Add new metrics (#11374)

* Better batch block warning * New metrics * Revert "Better batch block warning" This reverts commit e21fcfcebe. * More metrics * Add activation and exit queues * Gaz
2026-01-10 07:58:22 -05:00 · 2022-08-31 15:05:50 -07:00
parent b1e08307ed
commit c638e114db
11 changed files with 109 additions and 1 deletions
--- a/beacon-chain/blockchain/execution_engine.go
+++ b/beacon-chain/blockchain/execution_engine.go
@@ -89,7 +89,7 @@ func (s *Service) notifyForkchoiceUpdate(ctx context.Context, arg *notifyForkcho
 			}
 			return payloadID, nil
 		case execution.ErrInvalidPayloadStatus:
-			newPayloadInvalidNodeCount.Inc()
+			forkchoiceUpdatedInvalidNodeCount.Inc()
 			headRoot := arg.headRoot
 			if len(lastValidHash) == 0 {
 				lastValidHash = defaultLatestValidHash
--- a/beacon-chain/blockchain/log.go
+++ b/beacon-chain/blockchain/log.go
@@ -58,6 +58,7 @@ func logStateTransitionData(b interfaces.BeaconBlock) error {
 			return err
 		default:
 			log = log.WithField("txCount", len(txs))
+			txsPerSlotCount.Set(float64(len(txs)))
 		}

 	}
--- a/beacon-chain/blockchain/metrics.go
+++ b/beacon-chain/blockchain/metrics.go
@@ -158,10 +158,39 @@ var (
 		Name: "forkchoice_updated_optimistic_node_count",
 		Help: "Count the number of optimistic nodes after forkchoiceUpdated EE call",
 	})
+	forkchoiceUpdatedInvalidNodeCount = promauto.NewCounter(prometheus.CounterOpts{
+		Name: "forkchoice_updated_invalid_node_count",
+		Help: "Count the number of invalid nodes after forkchoiceUpdated EE call",
+	})
+	txsPerSlotCount = promauto.NewGauge(prometheus.GaugeOpts{
+		Name: "txs_per_slot_count",
+		Help: "Count the number of txs per slot",
+	})
 	missedPayloadIDFilledCount = promauto.NewCounter(prometheus.CounterOpts{
 		Name: "missed_payload_id_filled_count",
 		Help: "",
 	})
+	processAttsElapsedTime = promauto.NewHistogram(
+		prometheus.HistogramOpts{
+			Name:    "process_attestations_milliseconds",
+			Help:    "Captures latency for process attestations (forkchoice) in milliseconds",
+			Buckets: []float64{1, 5, 20, 100, 500, 1000},
+		},
+	)
+	newAttHeadElapsedTime = promauto.NewHistogram(
+		prometheus.HistogramOpts{
+			Name:    "new_att_head_milliseconds",
+			Help:    "Captures latency for new attestation head in milliseconds",
+			Buckets: []float64{1, 5, 20, 100, 500, 1000},
+		},
+	)
+	newBlockHeadElapsedTime = promauto.NewHistogram(
+		prometheus.HistogramOpts{
+			Name:    "new_block_head_milliseconds",
+			Help:    "Captures latency for new block head in milliseconds",
+			Buckets: []float64{1, 5, 20, 100, 500, 1000},
+		},
+	)
 )

 // reportSlotMetrics reports slot related metrics.
--- a/beacon-chain/blockchain/process_block.go
+++ b/beacon-chain/blockchain/process_block.go
@@ -182,10 +182,14 @@ func (s *Service) onBlock(ctx context.Context, signed interfaces.SignedBeaconBlo
 		msg := fmt.Sprintf("could not read balances for state w/ justified checkpoint %#x", justified.Root)
 		return errors.Wrap(err, msg)
 	}
+
+	start := time.Now()
 	headRoot, err := s.cfg.ForkChoiceStore.Head(ctx, balances)
 	if err != nil {
 		log.WithError(err).Warn("Could not update head")
 	}
+	newBlockHeadElapsedTime.Observe(float64(time.Since(start).Milliseconds()))
+
 	if err := s.notifyEngineIfChangedHead(ctx, headRoot); err != nil {
 		return err
 	}
--- a/beacon-chain/blockchain/receive_attestation.go
+++ b/beacon-chain/blockchain/receive_attestation.go
@@ -147,17 +147,22 @@ func (s *Service) UpdateHead(ctx context.Context) error {
 	s.processAttestationsLock.Lock()
 	defer s.processAttestationsLock.Unlock()

+	start := time.Now()
 	s.processAttestations(ctx)
+	processAttsElapsedTime.Observe(float64(time.Since(start).Milliseconds()))

 	justified := s.ForkChoicer().JustifiedCheckpoint()
 	balances, err := s.justifiedBalances.get(ctx, justified.Root)
 	if err != nil {
 		return err
 	}
+	start = time.Now()
 	newHeadRoot, err := s.cfg.ForkChoiceStore.Head(ctx, balances)
 	if err != nil {
 		log.WithError(err).Warn("Resolving fork due to new attestation")
 	}
+	newAttHeadElapsedTime.Observe(float64(time.Since(start).Milliseconds()))
+
 	s.headLock.RLock()
 	if s.headRoot() != newHeadRoot {
 		log.WithFields(logrus.Fields{
--- a/beacon-chain/core/epoch/BUILD.bazel
+++ b/beacon-chain/core/epoch/BUILD.bazel
@@ -19,6 +19,8 @@ go_library(
        "//proto/prysm/v1alpha1:go_default_library",
        "//proto/prysm/v1alpha1/attestation:go_default_library",
        "@com_github_pkg_errors//:go_default_library",
+        "@com_github_prometheus_client_golang//prometheus:go_default_library",
+        "@com_github_prometheus_client_golang//prometheus/promauto:go_default_library",
    ],
 )

--- a/beacon-chain/core/epoch/epoch_processing.go
+++ b/beacon-chain/core/epoch/epoch_processing.go
@@ -10,6 +10,8 @@ import (
 	"sort"

 	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prysmaticlabs/prysm/v3/beacon-chain/core/helpers"
 	"github.com/prysmaticlabs/prysm/v3/beacon-chain/core/time"
 	"github.com/prysmaticlabs/prysm/v3/beacon-chain/core/validators"
@@ -21,6 +23,13 @@ import (
 	"github.com/prysmaticlabs/prysm/v3/proto/prysm/v1alpha1/attestation"
 )

+var (
+	activationQueueCount = promauto.NewGauge(prometheus.GaugeOpts{
+		Name: "activation_queue_count",
+		Help: "Number of validators in the activation queue",
+	})
+)
+
 // sortableIndices implements the Sort interface to sort newly activated validator indices
 // by activation epoch and by index number.
 type sortableIndices struct {
@@ -119,6 +128,7 @@ func ProcessRegistryUpdates(ctx context.Context, state state.BeaconState) (state
 			activationQ = append(activationQ, types.ValidatorIndex(idx))
 		}
 	}
+	activationQueueCount.Set(float64(len(activationQ)))

 	sort.Sort(sortableIndices{indices: activationQ, validators: vals})

--- a/beacon-chain/core/validators/BUILD.bazel
+++ b/beacon-chain/core/validators/BUILD.bazel
@@ -18,6 +18,8 @@ go_library(
        "//proto/prysm/v1alpha1:go_default_library",
        "//time/slots:go_default_library",
        "@com_github_pkg_errors//:go_default_library",
+        "@com_github_prometheus_client_golang//prometheus:go_default_library",
+        "@com_github_prometheus_client_golang//prometheus/promauto:go_default_library",
    ],
 )

--- a/beacon-chain/core/validators/validator.go
+++ b/beacon-chain/core/validators/validator.go
@@ -8,6 +8,8 @@ import (
 	"context"

 	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prysmaticlabs/prysm/v3/beacon-chain/core/helpers"
 	"github.com/prysmaticlabs/prysm/v3/beacon-chain/core/time"
 	"github.com/prysmaticlabs/prysm/v3/beacon-chain/state"
@@ -18,6 +20,13 @@ import (
 	"github.com/prysmaticlabs/prysm/v3/time/slots"
 )

+var (
+	exitQueueCount = promauto.NewGauge(prometheus.GaugeOpts{
+		Name: "exit_queue_count",
+		Help: "Number of validators in the exit queue",
+	})
+)
+
 // InitiateValidatorExit takes in validator index and updates
 // validator with correct voluntary exit parameters.
 //
@@ -59,6 +68,7 @@ func InitiateValidatorExit(ctx context.Context, s state.BeaconState, idx types.V
 	if err != nil {
 		return nil, err
 	}
+	exitQueueCount.Set(float64(len(exitEpochs)))
 	exitEpochs = append(exitEpochs, helpers.ActivationExitEpoch(time.CurrentEpoch(s)))

 	// Obtain the exit queue epoch as the maximum number in the exit epochs array.
--- a/beacon-chain/execution/engine_client.go
+++ b/beacon-chain/execution/engine_client.go
@@ -537,22 +537,31 @@ func handleRPCError(err error) error {
 	}
 	switch e.ErrorCode() {
 	case -32700:
+		errParseCount.Inc()
 		return ErrParse
 	case -32600:
+		errInvalidRequestCount.Inc()
 		return ErrInvalidRequest
 	case -32601:
+		errMethodNotFoundCount.Inc()
 		return ErrMethodNotFound
 	case -32602:
+		errInvalidParamsCount.Inc()
 		return ErrInvalidParams
 	case -32603:
+		errInternalCount.Inc()
 		return ErrInternal
 	case -38001:
+		errUnknownPayloadCount.Inc()
 		return ErrUnknownPayload
 	case -38002:
+		errInvalidForkchoiceStateCount.Inc()
 		return ErrInvalidForkchoiceState
 	case -38003:
+		errInvalidPayloadAttributesCount.Inc()
 		return ErrInvalidPayloadAttributes
 	case -32000:
+		errServerErrorCount.Inc()
 		// Only -32000 status codes are data errors in the RPC specification.
 		errWithData, ok := err.(rpc.DataError)
 		if !ok {
--- a/beacon-chain/execution/metrics.go
+++ b/beacon-chain/execution/metrics.go
@@ -31,6 +31,42 @@ var (
 			Buckets: []float64{25, 50, 100, 200, 500, 1000, 2000, 4000},
 		},
 	)
+	errParseCount = promauto.NewCounter(prometheus.CounterOpts{
+		Name: "execution_parse_error_count",
+		Help: "The number of errors that occurred while parsing execution payload",
+	})
+	errInvalidRequestCount = promauto.NewCounter(prometheus.CounterOpts{
+		Name: "execution_invalid_request_count",
+		Help: "The number of errors that occurred due to invalid request",
+	})
+	errMethodNotFoundCount = promauto.NewCounter(prometheus.CounterOpts{
+		Name: "execution_method_not_found_count",
+		Help: "The number of errors that occurred due to method not found",
+	})
+	errInvalidParamsCount = promauto.NewCounter(prometheus.CounterOpts{
+		Name: "execution_invalid_params_count",
+		Help: "The number of errors that occurred due to invalid params",
+	})
+	errInternalCount = promauto.NewCounter(prometheus.CounterOpts{
+		Name: "execution_internal_error_count",
+		Help: "The number of errors that occurred due to internal error",
+	})
+	errUnknownPayloadCount = promauto.NewCounter(prometheus.CounterOpts{
+		Name: "execution_unknown_payload_count",
+		Help: "The number of errors that occurred due to unknown payload",
+	})
+	errInvalidForkchoiceStateCount = promauto.NewCounter(prometheus.CounterOpts{
+		Name: "execution_invalid_forkchoice_state_count",
+		Help: "The number of errors that occurred due to invalid forkchoice state",
+	})
+	errInvalidPayloadAttributesCount = promauto.NewCounter(prometheus.CounterOpts{
+		Name: "execution_invalid_payload_attributes_count",
+		Help: "The number of errors that occurred due to invalid payload attributes",
+	})
+	errServerErrorCount = promauto.NewCounter(prometheus.CounterOpts{
+		Name: "execution_server_error_count",
+		Help: "The number of errors that occurred due to server error",
+	})
 	reconstructedExecutionPayloadCount = promauto.NewCounter(prometheus.CounterOpts{
 		Name: "reconstructed_execution_payload_count",
 		Help: "Count the number of execution payloads that are reconstructed using JSON-RPC from payload headers",