From c638e114db7177830aa9d7f2d195addc037da726 Mon Sep 17 00:00:00 2001 From: terencechain Date: Wed, 31 Aug 2022 15:05:50 -0700 Subject: [PATCH] Add new metrics (#11374) * Better batch block warning * New metrics * Revert "Better batch block warning" This reverts commit e21fcfcebec3a7a15f39c6336de2aba58f87c625. * More metrics * Add activation and exit queues * Gaz --- beacon-chain/blockchain/execution_engine.go | 2 +- beacon-chain/blockchain/log.go | 1 + beacon-chain/blockchain/metrics.go | 29 +++++++++++++++ beacon-chain/blockchain/process_block.go | 4 +++ .../blockchain/receive_attestation.go | 5 +++ beacon-chain/core/epoch/BUILD.bazel | 2 ++ beacon-chain/core/epoch/epoch_processing.go | 10 ++++++ beacon-chain/core/validators/BUILD.bazel | 2 ++ beacon-chain/core/validators/validator.go | 10 ++++++ beacon-chain/execution/engine_client.go | 9 +++++ beacon-chain/execution/metrics.go | 36 +++++++++++++++++++ 11 files changed, 109 insertions(+), 1 deletion(-) diff --git a/beacon-chain/blockchain/execution_engine.go b/beacon-chain/blockchain/execution_engine.go index 1581b2eb48..e1d7f42c5e 100644 --- a/beacon-chain/blockchain/execution_engine.go +++ b/beacon-chain/blockchain/execution_engine.go @@ -89,7 +89,7 @@ func (s *Service) notifyForkchoiceUpdate(ctx context.Context, arg *notifyForkcho } return payloadID, nil case execution.ErrInvalidPayloadStatus: - newPayloadInvalidNodeCount.Inc() + forkchoiceUpdatedInvalidNodeCount.Inc() headRoot := arg.headRoot if len(lastValidHash) == 0 { lastValidHash = defaultLatestValidHash diff --git a/beacon-chain/blockchain/log.go b/beacon-chain/blockchain/log.go index 760f1b2c92..5ea346e673 100644 --- a/beacon-chain/blockchain/log.go +++ b/beacon-chain/blockchain/log.go @@ -58,6 +58,7 @@ func logStateTransitionData(b interfaces.BeaconBlock) error { return err default: log = log.WithField("txCount", len(txs)) + txsPerSlotCount.Set(float64(len(txs))) } } diff --git a/beacon-chain/blockchain/metrics.go b/beacon-chain/blockchain/metrics.go index ad8f9f7f9e..edd492bf35 100644 --- a/beacon-chain/blockchain/metrics.go +++ b/beacon-chain/blockchain/metrics.go @@ -158,10 +158,39 @@ var ( Name: "forkchoice_updated_optimistic_node_count", Help: "Count the number of optimistic nodes after forkchoiceUpdated EE call", }) + forkchoiceUpdatedInvalidNodeCount = promauto.NewCounter(prometheus.CounterOpts{ + Name: "forkchoice_updated_invalid_node_count", + Help: "Count the number of invalid nodes after forkchoiceUpdated EE call", + }) + txsPerSlotCount = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "txs_per_slot_count", + Help: "Count the number of txs per slot", + }) missedPayloadIDFilledCount = promauto.NewCounter(prometheus.CounterOpts{ Name: "missed_payload_id_filled_count", Help: "", }) + processAttsElapsedTime = promauto.NewHistogram( + prometheus.HistogramOpts{ + Name: "process_attestations_milliseconds", + Help: "Captures latency for process attestations (forkchoice) in milliseconds", + Buckets: []float64{1, 5, 20, 100, 500, 1000}, + }, + ) + newAttHeadElapsedTime = promauto.NewHistogram( + prometheus.HistogramOpts{ + Name: "new_att_head_milliseconds", + Help: "Captures latency for new attestation head in milliseconds", + Buckets: []float64{1, 5, 20, 100, 500, 1000}, + }, + ) + newBlockHeadElapsedTime = promauto.NewHistogram( + prometheus.HistogramOpts{ + Name: "new_block_head_milliseconds", + Help: "Captures latency for new block head in milliseconds", + Buckets: []float64{1, 5, 20, 100, 500, 1000}, + }, + ) ) // reportSlotMetrics reports slot related metrics. diff --git a/beacon-chain/blockchain/process_block.go b/beacon-chain/blockchain/process_block.go index 85b159add2..ea77da369f 100644 --- a/beacon-chain/blockchain/process_block.go +++ b/beacon-chain/blockchain/process_block.go @@ -182,10 +182,14 @@ func (s *Service) onBlock(ctx context.Context, signed interfaces.SignedBeaconBlo msg := fmt.Sprintf("could not read balances for state w/ justified checkpoint %#x", justified.Root) return errors.Wrap(err, msg) } + + start := time.Now() headRoot, err := s.cfg.ForkChoiceStore.Head(ctx, balances) if err != nil { log.WithError(err).Warn("Could not update head") } + newBlockHeadElapsedTime.Observe(float64(time.Since(start).Milliseconds())) + if err := s.notifyEngineIfChangedHead(ctx, headRoot); err != nil { return err } diff --git a/beacon-chain/blockchain/receive_attestation.go b/beacon-chain/blockchain/receive_attestation.go index a5953ccb0e..bee7b7dd55 100644 --- a/beacon-chain/blockchain/receive_attestation.go +++ b/beacon-chain/blockchain/receive_attestation.go @@ -147,17 +147,22 @@ func (s *Service) UpdateHead(ctx context.Context) error { s.processAttestationsLock.Lock() defer s.processAttestationsLock.Unlock() + start := time.Now() s.processAttestations(ctx) + processAttsElapsedTime.Observe(float64(time.Since(start).Milliseconds())) justified := s.ForkChoicer().JustifiedCheckpoint() balances, err := s.justifiedBalances.get(ctx, justified.Root) if err != nil { return err } + start = time.Now() newHeadRoot, err := s.cfg.ForkChoiceStore.Head(ctx, balances) if err != nil { log.WithError(err).Warn("Resolving fork due to new attestation") } + newAttHeadElapsedTime.Observe(float64(time.Since(start).Milliseconds())) + s.headLock.RLock() if s.headRoot() != newHeadRoot { log.WithFields(logrus.Fields{ diff --git a/beacon-chain/core/epoch/BUILD.bazel b/beacon-chain/core/epoch/BUILD.bazel index da950d9da3..1b1fc26027 100644 --- a/beacon-chain/core/epoch/BUILD.bazel +++ b/beacon-chain/core/epoch/BUILD.bazel @@ -19,6 +19,8 @@ go_library( "//proto/prysm/v1alpha1:go_default_library", "//proto/prysm/v1alpha1/attestation:go_default_library", "@com_github_pkg_errors//:go_default_library", + "@com_github_prometheus_client_golang//prometheus:go_default_library", + "@com_github_prometheus_client_golang//prometheus/promauto:go_default_library", ], ) diff --git a/beacon-chain/core/epoch/epoch_processing.go b/beacon-chain/core/epoch/epoch_processing.go index 828e2795d4..7c9616e925 100644 --- a/beacon-chain/core/epoch/epoch_processing.go +++ b/beacon-chain/core/epoch/epoch_processing.go @@ -10,6 +10,8 @@ import ( "sort" "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prysmaticlabs/prysm/v3/beacon-chain/core/helpers" "github.com/prysmaticlabs/prysm/v3/beacon-chain/core/time" "github.com/prysmaticlabs/prysm/v3/beacon-chain/core/validators" @@ -21,6 +23,13 @@ import ( "github.com/prysmaticlabs/prysm/v3/proto/prysm/v1alpha1/attestation" ) +var ( + activationQueueCount = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "activation_queue_count", + Help: "Number of validators in the activation queue", + }) +) + // sortableIndices implements the Sort interface to sort newly activated validator indices // by activation epoch and by index number. type sortableIndices struct { @@ -119,6 +128,7 @@ func ProcessRegistryUpdates(ctx context.Context, state state.BeaconState) (state activationQ = append(activationQ, types.ValidatorIndex(idx)) } } + activationQueueCount.Set(float64(len(activationQ))) sort.Sort(sortableIndices{indices: activationQ, validators: vals}) diff --git a/beacon-chain/core/validators/BUILD.bazel b/beacon-chain/core/validators/BUILD.bazel index 31b0c50703..f704629fe7 100644 --- a/beacon-chain/core/validators/BUILD.bazel +++ b/beacon-chain/core/validators/BUILD.bazel @@ -18,6 +18,8 @@ go_library( "//proto/prysm/v1alpha1:go_default_library", "//time/slots:go_default_library", "@com_github_pkg_errors//:go_default_library", + "@com_github_prometheus_client_golang//prometheus:go_default_library", + "@com_github_prometheus_client_golang//prometheus/promauto:go_default_library", ], ) diff --git a/beacon-chain/core/validators/validator.go b/beacon-chain/core/validators/validator.go index d24d17637a..08f34c99e6 100644 --- a/beacon-chain/core/validators/validator.go +++ b/beacon-chain/core/validators/validator.go @@ -8,6 +8,8 @@ import ( "context" "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prysmaticlabs/prysm/v3/beacon-chain/core/helpers" "github.com/prysmaticlabs/prysm/v3/beacon-chain/core/time" "github.com/prysmaticlabs/prysm/v3/beacon-chain/state" @@ -18,6 +20,13 @@ import ( "github.com/prysmaticlabs/prysm/v3/time/slots" ) +var ( + exitQueueCount = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "exit_queue_count", + Help: "Number of validators in the exit queue", + }) +) + // InitiateValidatorExit takes in validator index and updates // validator with correct voluntary exit parameters. // @@ -59,6 +68,7 @@ func InitiateValidatorExit(ctx context.Context, s state.BeaconState, idx types.V if err != nil { return nil, err } + exitQueueCount.Set(float64(len(exitEpochs))) exitEpochs = append(exitEpochs, helpers.ActivationExitEpoch(time.CurrentEpoch(s))) // Obtain the exit queue epoch as the maximum number in the exit epochs array. diff --git a/beacon-chain/execution/engine_client.go b/beacon-chain/execution/engine_client.go index c5c375fe5f..5389ade2ff 100644 --- a/beacon-chain/execution/engine_client.go +++ b/beacon-chain/execution/engine_client.go @@ -537,22 +537,31 @@ func handleRPCError(err error) error { } switch e.ErrorCode() { case -32700: + errParseCount.Inc() return ErrParse case -32600: + errInvalidRequestCount.Inc() return ErrInvalidRequest case -32601: + errMethodNotFoundCount.Inc() return ErrMethodNotFound case -32602: + errInvalidParamsCount.Inc() return ErrInvalidParams case -32603: + errInternalCount.Inc() return ErrInternal case -38001: + errUnknownPayloadCount.Inc() return ErrUnknownPayload case -38002: + errInvalidForkchoiceStateCount.Inc() return ErrInvalidForkchoiceState case -38003: + errInvalidPayloadAttributesCount.Inc() return ErrInvalidPayloadAttributes case -32000: + errServerErrorCount.Inc() // Only -32000 status codes are data errors in the RPC specification. errWithData, ok := err.(rpc.DataError) if !ok { diff --git a/beacon-chain/execution/metrics.go b/beacon-chain/execution/metrics.go index e14ddb5d38..218318e5a5 100644 --- a/beacon-chain/execution/metrics.go +++ b/beacon-chain/execution/metrics.go @@ -31,6 +31,42 @@ var ( Buckets: []float64{25, 50, 100, 200, 500, 1000, 2000, 4000}, }, ) + errParseCount = promauto.NewCounter(prometheus.CounterOpts{ + Name: "execution_parse_error_count", + Help: "The number of errors that occurred while parsing execution payload", + }) + errInvalidRequestCount = promauto.NewCounter(prometheus.CounterOpts{ + Name: "execution_invalid_request_count", + Help: "The number of errors that occurred due to invalid request", + }) + errMethodNotFoundCount = promauto.NewCounter(prometheus.CounterOpts{ + Name: "execution_method_not_found_count", + Help: "The number of errors that occurred due to method not found", + }) + errInvalidParamsCount = promauto.NewCounter(prometheus.CounterOpts{ + Name: "execution_invalid_params_count", + Help: "The number of errors that occurred due to invalid params", + }) + errInternalCount = promauto.NewCounter(prometheus.CounterOpts{ + Name: "execution_internal_error_count", + Help: "The number of errors that occurred due to internal error", + }) + errUnknownPayloadCount = promauto.NewCounter(prometheus.CounterOpts{ + Name: "execution_unknown_payload_count", + Help: "The number of errors that occurred due to unknown payload", + }) + errInvalidForkchoiceStateCount = promauto.NewCounter(prometheus.CounterOpts{ + Name: "execution_invalid_forkchoice_state_count", + Help: "The number of errors that occurred due to invalid forkchoice state", + }) + errInvalidPayloadAttributesCount = promauto.NewCounter(prometheus.CounterOpts{ + Name: "execution_invalid_payload_attributes_count", + Help: "The number of errors that occurred due to invalid payload attributes", + }) + errServerErrorCount = promauto.NewCounter(prometheus.CounterOpts{ + Name: "execution_server_error_count", + Help: "The number of errors that occurred due to server error", + }) reconstructedExecutionPayloadCount = promauto.NewCounter(prometheus.CounterOpts{ Name: "reconstructed_execution_payload_count", Help: "Count the number of execution payloads that are reconstructed using JSON-RPC from payload headers",