mirror of
https://github.com/OffchainLabs/prysm.git
synced 2026-01-08 21:08:10 -05:00
#### This PR sets the foundation for the new logging features. --- The goal of this big PR is the following: 1. Adding a log.go file to every package: [_commit_](54f6396d4c) - Writing a bash script that adds the log.go file to every package that imports logrus, except the excluded packages, configured at the top of the bash script. - the log.go file creates a log variable and sets a field called `package` to the full path of that package. - I have tried to fix every error/problem that came from mass generation of this file. (duplicate declarations, different prefix names, etc...) - some packages had the log.go file from before, and had some helper functions in there as well. I've moved all of them to a `log_helpers.go` file within each package. 2. Create a CI rule which verifies that: [_commit_](b799c3a0ef) - every package which imports logrus, also has a log.go file, except the excluded packages. - the `package` field of each log.go variable, has the correct path. (to detect when we move a package or change it's name) - I pushed a commit with a manually changed log.go file to trigger the ci check failure and it worked. 3. Alter the logging system to read the prefix from this `package` field for every log while outputing: [_commit_](b0c7f1146c) - some packages have/want/need a different log prefix than their package name (like `kv`). This can be solved by keeping a map of package paths to prefix names somewhere. --- **Some notes:** - Please review everything carefully. - I created the `prefixReplacement` map and populated the data that I deemed necessary. Please check it and complain if something doesn't make sense or is missing. I attached at the bottom, the list of all the packages that used to use a different name than their package name as their prefix. - I have chosen to mark some packages to be excluded from this whole process. They will either not log anything, or log without a prefix, or log using their previously defined prefix. See the list of exclusions in the bottom. - I fixed all the tests that failed because of this change. These were failing because they were expecting the old prefix to be in the generated logs. I have changed those to expect the new `package` field instead. This might not be a great solution. Ideally we might want to remove this from the tests so they only test for relevant fields in the logs. but this is a problem for another day. - Please run the node with this config, and mention if you see something weird in the logs. (use different verbosities) - The CI workflow uses a script that basically runs the `hack/gen-logs.sh` and checks that the git diff is zero. that script is `hack/check-logs.sh`. This means that if one runs this script locally, it will not actually _check_ anything, rather than just regenerate the log.go files and fix any mistake. This might be confusing. Please suggest solutions if you think it's a problem. --- **A list of packages that used a different prefix than their package names for their logs:** - beacon-chain/cache/depositsnapshot/ package depositsnapshot, prefix "cache" - beacon-chain/core/transition/log.go — package transition, prefix "state" - beacon-chain/db/kv/log.go — package kv, prefix "db" - beacon-chain/db/slasherkv/log.go — package slasherkv, prefix "slasherdb" - beacon-chain/db/pruner/pruner.go — package pruner, prefix "db-pruner" - beacon-chain/light-client/log.go — package light_client, prefix "light-client" - beacon-chain/operations/attestations/log.go — package attestations, prefix "pool/attestations" - beacon-chain/operations/slashings/log.go — package slashings, prefix "pool/slashings" - beacon-chain/rpc/core/log.go — package core, prefix "rpc/core" - beacon-chain/rpc/eth/beacon/log.go — package beacon, prefix "rpc/beaconv1" - beacon-chain/rpc/eth/validator/log.go — package validator, prefix "beacon-api" - beacon-chain/rpc/prysm/v1alpha1/beacon/log.go — package beacon, prefix "rpc" - beacon-chain/rpc/prysm/v1alpha1/validator/log.go — package validator, prefix "rpc/validator" - beacon-chain/state/stategen/log.go — package stategen, prefix "state-gen" - beacon-chain/sync/checkpoint/log.go — package checkpoint, prefix "checkpoint-sync" - beacon-chain/sync/initial-sync/log.go — package initialsync, prefix "initial-sync" - cmd/prysmctl/p2p/log.go — package p2p, prefix "prysmctl-p2p" - config/features/log.go -- package features, prefix "flags" - io/file/log.go — package file, prefix "fileutil" - proto/prysm/v1alpha1/log.go — package eth, prefix "protobuf" - validator/client/beacon-api/log.go — package beacon_api, prefix "beacon-api" - validator/db/kv/log.go — package kv, prefix "db" - validator/db/filesystem/db.go — package filesystem, prefix "db" - validator/keymanager/derived/log.go — package derived, prefix "derived-keymanager" - validator/keymanager/local/log.go — package local, prefix "local-keymanager" - validator/keymanager/remote-web3signer/log.go — package remote_web3signer, prefix "remote-keymanager" - validator/keymanager/remote-web3signer/internal/log.go — package internal, prefix "remote-web3signer- internal" - beacon-chain/forkchoice/doubly... prefix is "forkchoice-doublylinkedtree" **List of excluded directories (their subdirectories are also excluded):** ``` EXCLUDED_PATH_PREFIXES=( "testing" "validator/client/testutil" "beacon-chain/p2p/testing" "beacon-chain/rpc/eth/config" "beacon-chain/rpc/prysm/v1alpha1/debug" "tools" "runtime" "monitoring" "io" "cmd" ".well-known" "changelog" "hack" "specrefs" "third_party" "bazel-out" "bazel-bin" "bazel-prysm" "bazel-testlogs" "build" ".github" ".jj" ".idea" ".vscode" ) ```
252 lines
7.5 KiB
Go
252 lines
7.5 KiB
Go
package pruner
|
|
|
|
import (
|
|
"context"
|
|
"time"
|
|
|
|
"github.com/OffchainLabs/prysm/v7/beacon-chain/db"
|
|
"github.com/OffchainLabs/prysm/v7/beacon-chain/db/iface"
|
|
"github.com/OffchainLabs/prysm/v7/config/params"
|
|
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
|
|
"github.com/OffchainLabs/prysm/v7/time/slots"
|
|
"github.com/pkg/errors"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
const (
|
|
// defaultPrunableBatchSize is the number of slots that can be pruned at once.
|
|
defaultPrunableBatchSize = 32
|
|
// defaultPruningWindow is the duration of one pruning window.
|
|
defaultPruningWindow = time.Second * 3
|
|
// defaultNumBatchesToPrune is the number of batches to prune in one pruning window.
|
|
defaultNumBatchesToPrune = 15
|
|
)
|
|
|
|
// custodyUpdater is a tiny interface that p2p service implements; kept here to avoid
|
|
// importing the p2p package and creating a cycle.
|
|
type custodyUpdater interface {
|
|
UpdateEarliestAvailableSlot(earliestAvailableSlot primitives.Slot) error
|
|
}
|
|
|
|
type ServiceOption func(*Service)
|
|
|
|
// WithRetentionPeriod allows the user to specify a different data retention period than the spec default.
|
|
// The retention period is specified in epochs, and must be >= MIN_EPOCHS_FOR_BLOCK_REQUESTS.
|
|
func WithRetentionPeriod(retentionEpochs primitives.Epoch) ServiceOption {
|
|
return func(s *Service) {
|
|
defaultRetentionEpochs := primitives.Epoch(params.BeaconConfig().MinEpochsForBlockRequests) + 1
|
|
if retentionEpochs < defaultRetentionEpochs {
|
|
log.WithField("userEpochs", retentionEpochs).
|
|
WithField("minRequired", defaultRetentionEpochs).
|
|
Warn("Retention period too low, ignoring and using minimum required value")
|
|
retentionEpochs = defaultRetentionEpochs
|
|
}
|
|
|
|
s.ps = pruneStartSlotFunc(retentionEpochs)
|
|
}
|
|
}
|
|
|
|
func WithSlotTicker(slotTicker slots.Ticker) ServiceOption {
|
|
return func(s *Service) {
|
|
s.slotTicker = slotTicker
|
|
}
|
|
}
|
|
|
|
// Service defines a service that prunes beacon chain DB based on MIN_EPOCHS_FOR_BLOCK_REQUESTS.
|
|
type Service struct {
|
|
ctx context.Context
|
|
db db.Database
|
|
ps func(current primitives.Slot) primitives.Slot
|
|
prunedUpto primitives.Slot
|
|
done chan struct{}
|
|
slotTicker slots.Ticker
|
|
backfillWaiter func() error
|
|
initSyncWaiter func() error
|
|
custody custodyUpdater
|
|
}
|
|
|
|
func New(ctx context.Context, db iface.Database, genesisTime time.Time, initSyncWaiter, backfillWaiter func() error, custody custodyUpdater, opts ...ServiceOption) (*Service, error) {
|
|
if custody == nil {
|
|
return nil, errors.New("custody updater is required for pruner but was not provided")
|
|
}
|
|
|
|
p := &Service{
|
|
ctx: ctx,
|
|
db: db,
|
|
ps: pruneStartSlotFunc(primitives.Epoch(params.BeaconConfig().MinEpochsForBlockRequests) + 1), // Default retention epochs is MIN_EPOCHS_FOR_BLOCK_REQUESTS + 1 from the current slot.
|
|
done: make(chan struct{}),
|
|
slotTicker: slots.NewSlotTicker(slots.UnsafeStartTime(genesisTime, 0), params.BeaconConfig().SecondsPerSlot),
|
|
initSyncWaiter: initSyncWaiter,
|
|
backfillWaiter: backfillWaiter,
|
|
custody: custody,
|
|
}
|
|
|
|
for _, o := range opts {
|
|
o(p)
|
|
}
|
|
|
|
return p, nil
|
|
}
|
|
|
|
func (p *Service) Start() {
|
|
log.Info("Starting Beacon DB pruner service")
|
|
p.run()
|
|
}
|
|
|
|
func (p *Service) Stop() error {
|
|
log.Info("Stopping Beacon DB pruner service")
|
|
close(p.done)
|
|
return nil
|
|
}
|
|
|
|
func (p *Service) Status() error {
|
|
return nil
|
|
}
|
|
|
|
func (p *Service) run() {
|
|
if p.initSyncWaiter != nil {
|
|
log.Info("Waiting for initial sync service to complete before starting pruner")
|
|
if err := p.initSyncWaiter(); err != nil {
|
|
log.WithError(err).Error("Failed to start database pruner, error waiting for initial sync completion")
|
|
return
|
|
}
|
|
}
|
|
if p.backfillWaiter != nil {
|
|
log.Info("Waiting for backfill service to complete before starting pruner")
|
|
if err := p.backfillWaiter(); err != nil {
|
|
log.WithError(err).Error("Failed to start database pruner, error waiting for backfill completion")
|
|
return
|
|
}
|
|
}
|
|
|
|
defer p.slotTicker.Done()
|
|
|
|
for {
|
|
select {
|
|
case <-p.ctx.Done():
|
|
log.Debug("Stopping Beacon DB pruner service", "prunedUpto", p.prunedUpto)
|
|
return
|
|
case <-p.done:
|
|
log.Debug("Stopping Beacon DB pruner service", "prunedUpto", p.prunedUpto)
|
|
return
|
|
case slot := <-p.slotTicker.C():
|
|
// Prune at the middle of every epoch since we do a lot of things around epoch boundaries.
|
|
if slots.SinceEpochStarts(slot) != (params.BeaconConfig().SlotsPerEpoch / 2) {
|
|
continue
|
|
}
|
|
|
|
if err := p.prune(slot); err != nil {
|
|
log.WithError(err).Error("Failed to prune database")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// prune deletes historical chain data beyond the pruneSlot.
|
|
func (p *Service) prune(slot primitives.Slot) error {
|
|
// Prune everything up to this slot (inclusive).
|
|
pruneUpto := p.ps(slot)
|
|
|
|
// Can't prune beyond genesis.
|
|
if pruneUpto == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Skip if already pruned up to this slot.
|
|
if pruneUpto <= p.prunedUpto {
|
|
return nil
|
|
}
|
|
|
|
log.WithFields(logrus.Fields{
|
|
"pruneUpto": pruneUpto,
|
|
}).Debug("Pruning chain data")
|
|
|
|
tt := time.Now()
|
|
numBatches, err := p.pruneBatches(pruneUpto)
|
|
if err != nil {
|
|
return errors.Wrap(err, "failed to prune batches")
|
|
}
|
|
|
|
earliestAvailableSlot := pruneUpto + 1
|
|
|
|
// Update pruning checkpoint.
|
|
p.prunedUpto = pruneUpto
|
|
|
|
// Update the earliest available slot after pruning
|
|
if err := p.updateEarliestAvailableSlot(earliestAvailableSlot); err != nil {
|
|
return errors.Wrap(err, "update earliest available slot")
|
|
}
|
|
|
|
log.WithFields(logrus.Fields{
|
|
"prunedUpto": pruneUpto,
|
|
"earliestAvailableSlot": earliestAvailableSlot,
|
|
"duration": time.Since(tt),
|
|
"currentSlot": slot,
|
|
"batchSize": defaultPrunableBatchSize,
|
|
"numBatches": numBatches,
|
|
}).Debug("Successfully pruned chain data")
|
|
|
|
return nil
|
|
}
|
|
|
|
// updateEarliestAvailableSlot updates the earliest available slot via the injected custody updater
|
|
// and also persists it to the database.
|
|
func (p *Service) updateEarliestAvailableSlot(earliestAvailableSlot primitives.Slot) error {
|
|
if !params.FuluEnabled() {
|
|
return nil
|
|
}
|
|
|
|
// Update the p2p in-memory state
|
|
if err := p.custody.UpdateEarliestAvailableSlot(earliestAvailableSlot); err != nil {
|
|
return errors.Wrapf(err, "update earliest available slot after pruning to %d", earliestAvailableSlot)
|
|
}
|
|
|
|
// Persist to database to ensure it survives restarts
|
|
if err := p.db.UpdateEarliestAvailableSlot(p.ctx, earliestAvailableSlot); err != nil {
|
|
return errors.Wrapf(err, "update earliest available slot in database for slot %d", earliestAvailableSlot)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *Service) pruneBatches(pruneUpto primitives.Slot) (int, error) {
|
|
ctx, cancel := context.WithTimeout(p.ctx, defaultPruningWindow)
|
|
defer cancel()
|
|
|
|
numBatches := 0
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return numBatches, nil
|
|
default:
|
|
for range defaultNumBatchesToPrune {
|
|
slotsDeleted, err := p.db.DeleteHistoricalDataBeforeSlot(ctx, pruneUpto, defaultPrunableBatchSize)
|
|
if err != nil {
|
|
return 0, errors.Wrapf(err, "could not delete upto slot %d", pruneUpto)
|
|
}
|
|
|
|
// Return if there's nothing to delete.
|
|
if slotsDeleted == 0 {
|
|
return numBatches, nil
|
|
}
|
|
|
|
numBatches++
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// pruneStartSlotFunc returns the function to determine the start slot to start pruning.
|
|
func pruneStartSlotFunc(retentionEpochs primitives.Epoch) func(primitives.Slot) primitives.Slot {
|
|
return func(current primitives.Slot) primitives.Slot {
|
|
if retentionEpochs > slots.MaxSafeEpoch() {
|
|
retentionEpochs = slots.MaxSafeEpoch()
|
|
}
|
|
offset := slots.UnsafeEpochStart(retentionEpochs)
|
|
if offset >= current {
|
|
return 0
|
|
}
|
|
return current - offset
|
|
}
|
|
}
|