Files
prysm/beacon-chain/db/pruner/pruner.go
Bastin 6b5ba5ad01 Switch logging from using prefixes to the new package path format (#16059)
#### This PR sets the foundation for the new logging features.

---

The goal of this big PR is the following:
1. Adding a log.go file to every package:
[_commit_](54f6396d4c)
- Writing a bash script that adds the log.go file to every package that
imports logrus, except the excluded packages, configured at the top of
the bash script.
- the log.go file creates a log variable and sets a field called
`package` to the full path of that package.
- I have tried to fix every error/problem that came from mass generation
of this file. (duplicate declarations, different prefix names, etc...)
- some packages had the log.go file from before, and had some helper
functions in there as well. I've moved all of them to a `log_helpers.go`
file within each package.

2. Create a CI rule which verifies that:
[_commit_](b799c3a0ef)
- every package which imports logrus, also has a log.go file, except the
excluded packages.
- the `package` field of each log.go variable, has the correct path. (to
detect when we move a package or change it's name)
- I pushed a commit with a manually changed log.go file to trigger the
ci check failure and it worked.

3. Alter the logging system to read the prefix from this `package` field
for every log while outputing:
[_commit_](b0c7f1146c)
- some packages have/want/need a different log prefix than their package
name (like `kv`). This can be solved by keeping a map of package paths
to prefix names somewhere.
    
    
---

**Some notes:**
- Please review everything carefully.
- I created the `prefixReplacement` map and populated the data that I
deemed necessary. Please check it and complain if something doesn't make
sense or is missing. I attached at the bottom, the list of all the
packages that used to use a different name than their package name as
their prefix.
- I have chosen to mark some packages to be excluded from this whole
process. They will either not log anything, or log without a prefix, or
log using their previously defined prefix. See the list of exclusions in
the bottom.
- I fixed all the tests that failed because of this change. These were
failing because they were expecting the old prefix to be in the
generated logs. I have changed those to expect the new `package` field
instead. This might not be a great solution. Ideally we might want to
remove this from the tests so they only test for relevant fields in the
logs. but this is a problem for another day.
- Please run the node with this config, and mention if you see something
weird in the logs. (use different verbosities)
- The CI workflow uses a script that basically runs the
`hack/gen-logs.sh` and checks that the git diff is zero. that script is
`hack/check-logs.sh`. This means that if one runs this script locally,
it will not actually _check_ anything, rather than just regenerate the
log.go files and fix any mistake. This might be confusing. Please
suggest solutions if you think it's a problem.

---

**A list of packages that used a different prefix than their package
names for their logs:**

- beacon-chain/cache/depositsnapshot/ package depositsnapshot, prefix
"cache"
- beacon-chain/core/transition/log.go — package transition, prefix
"state"
  - beacon-chain/db/kv/log.go — package kv, prefix "db"
- beacon-chain/db/slasherkv/log.go — package slasherkv, prefix
"slasherdb"
- beacon-chain/db/pruner/pruner.go — package pruner, prefix "db-pruner"
- beacon-chain/light-client/log.go — package light_client, prefix
"light-client"
- beacon-chain/operations/attestations/log.go — package attestations,
prefix "pool/attestations"
- beacon-chain/operations/slashings/log.go — package slashings, prefix
"pool/slashings"
  - beacon-chain/rpc/core/log.go — package core, prefix "rpc/core"
- beacon-chain/rpc/eth/beacon/log.go — package beacon, prefix
"rpc/beaconv1"
- beacon-chain/rpc/eth/validator/log.go — package validator, prefix
"beacon-api"
- beacon-chain/rpc/prysm/v1alpha1/beacon/log.go — package beacon, prefix
"rpc"
- beacon-chain/rpc/prysm/v1alpha1/validator/log.go — package validator,
prefix "rpc/validator"
- beacon-chain/state/stategen/log.go — package stategen, prefix
"state-gen"
- beacon-chain/sync/checkpoint/log.go — package checkpoint, prefix
"checkpoint-sync"
- beacon-chain/sync/initial-sync/log.go — package initialsync, prefix
"initial-sync"
  - cmd/prysmctl/p2p/log.go — package p2p, prefix "prysmctl-p2p"
  - config/features/log.go -- package features, prefix "flags"
  - io/file/log.go — package file, prefix "fileutil"
  - proto/prysm/v1alpha1/log.go — package eth, prefix "protobuf"
- validator/client/beacon-api/log.go — package beacon_api, prefix
"beacon-api"
  - validator/db/kv/log.go — package kv, prefix "db"
  - validator/db/filesystem/db.go — package filesystem, prefix "db"
- validator/keymanager/derived/log.go — package derived, prefix
"derived-keymanager"
- validator/keymanager/local/log.go — package local, prefix
"local-keymanager"
- validator/keymanager/remote-web3signer/log.go — package
remote_web3signer, prefix "remote-keymanager"
- validator/keymanager/remote-web3signer/internal/log.go — package
internal, prefix "remote-web3signer-
    internal"
- beacon-chain/forkchoice/doubly... prefix is
"forkchoice-doublylinkedtree"
  
  
  
**List of excluded directories (their subdirectories are also
excluded):**
  ```
  EXCLUDED_PATH_PREFIXES=(
      "testing"
      "validator/client/testutil"
      "beacon-chain/p2p/testing"
      "beacon-chain/rpc/eth/config"
      "beacon-chain/rpc/prysm/v1alpha1/debug"
      "tools"
      "runtime"
      "monitoring"
      "io"
      "cmd"
      ".well-known"
      "changelog"
      "hack"
      "specrefs"
      "third_party"
      "bazel-out"
      "bazel-bin"
      "bazel-prysm"
      "bazel-testlogs"
      "build"
      ".github"
      ".jj"
      ".idea"
      ".vscode"
)
```
2026-01-05 14:15:20 +00:00

252 lines
7.5 KiB
Go

package pruner
import (
"context"
"time"
"github.com/OffchainLabs/prysm/v7/beacon-chain/db"
"github.com/OffchainLabs/prysm/v7/beacon-chain/db/iface"
"github.com/OffchainLabs/prysm/v7/config/params"
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
"github.com/OffchainLabs/prysm/v7/time/slots"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
const (
// defaultPrunableBatchSize is the number of slots that can be pruned at once.
defaultPrunableBatchSize = 32
// defaultPruningWindow is the duration of one pruning window.
defaultPruningWindow = time.Second * 3
// defaultNumBatchesToPrune is the number of batches to prune in one pruning window.
defaultNumBatchesToPrune = 15
)
// custodyUpdater is a tiny interface that p2p service implements; kept here to avoid
// importing the p2p package and creating a cycle.
type custodyUpdater interface {
UpdateEarliestAvailableSlot(earliestAvailableSlot primitives.Slot) error
}
type ServiceOption func(*Service)
// WithRetentionPeriod allows the user to specify a different data retention period than the spec default.
// The retention period is specified in epochs, and must be >= MIN_EPOCHS_FOR_BLOCK_REQUESTS.
func WithRetentionPeriod(retentionEpochs primitives.Epoch) ServiceOption {
return func(s *Service) {
defaultRetentionEpochs := primitives.Epoch(params.BeaconConfig().MinEpochsForBlockRequests) + 1
if retentionEpochs < defaultRetentionEpochs {
log.WithField("userEpochs", retentionEpochs).
WithField("minRequired", defaultRetentionEpochs).
Warn("Retention period too low, ignoring and using minimum required value")
retentionEpochs = defaultRetentionEpochs
}
s.ps = pruneStartSlotFunc(retentionEpochs)
}
}
func WithSlotTicker(slotTicker slots.Ticker) ServiceOption {
return func(s *Service) {
s.slotTicker = slotTicker
}
}
// Service defines a service that prunes beacon chain DB based on MIN_EPOCHS_FOR_BLOCK_REQUESTS.
type Service struct {
ctx context.Context
db db.Database
ps func(current primitives.Slot) primitives.Slot
prunedUpto primitives.Slot
done chan struct{}
slotTicker slots.Ticker
backfillWaiter func() error
initSyncWaiter func() error
custody custodyUpdater
}
func New(ctx context.Context, db iface.Database, genesisTime time.Time, initSyncWaiter, backfillWaiter func() error, custody custodyUpdater, opts ...ServiceOption) (*Service, error) {
if custody == nil {
return nil, errors.New("custody updater is required for pruner but was not provided")
}
p := &Service{
ctx: ctx,
db: db,
ps: pruneStartSlotFunc(primitives.Epoch(params.BeaconConfig().MinEpochsForBlockRequests) + 1), // Default retention epochs is MIN_EPOCHS_FOR_BLOCK_REQUESTS + 1 from the current slot.
done: make(chan struct{}),
slotTicker: slots.NewSlotTicker(slots.UnsafeStartTime(genesisTime, 0), params.BeaconConfig().SecondsPerSlot),
initSyncWaiter: initSyncWaiter,
backfillWaiter: backfillWaiter,
custody: custody,
}
for _, o := range opts {
o(p)
}
return p, nil
}
func (p *Service) Start() {
log.Info("Starting Beacon DB pruner service")
p.run()
}
func (p *Service) Stop() error {
log.Info("Stopping Beacon DB pruner service")
close(p.done)
return nil
}
func (p *Service) Status() error {
return nil
}
func (p *Service) run() {
if p.initSyncWaiter != nil {
log.Info("Waiting for initial sync service to complete before starting pruner")
if err := p.initSyncWaiter(); err != nil {
log.WithError(err).Error("Failed to start database pruner, error waiting for initial sync completion")
return
}
}
if p.backfillWaiter != nil {
log.Info("Waiting for backfill service to complete before starting pruner")
if err := p.backfillWaiter(); err != nil {
log.WithError(err).Error("Failed to start database pruner, error waiting for backfill completion")
return
}
}
defer p.slotTicker.Done()
for {
select {
case <-p.ctx.Done():
log.Debug("Stopping Beacon DB pruner service", "prunedUpto", p.prunedUpto)
return
case <-p.done:
log.Debug("Stopping Beacon DB pruner service", "prunedUpto", p.prunedUpto)
return
case slot := <-p.slotTicker.C():
// Prune at the middle of every epoch since we do a lot of things around epoch boundaries.
if slots.SinceEpochStarts(slot) != (params.BeaconConfig().SlotsPerEpoch / 2) {
continue
}
if err := p.prune(slot); err != nil {
log.WithError(err).Error("Failed to prune database")
}
}
}
}
// prune deletes historical chain data beyond the pruneSlot.
func (p *Service) prune(slot primitives.Slot) error {
// Prune everything up to this slot (inclusive).
pruneUpto := p.ps(slot)
// Can't prune beyond genesis.
if pruneUpto == 0 {
return nil
}
// Skip if already pruned up to this slot.
if pruneUpto <= p.prunedUpto {
return nil
}
log.WithFields(logrus.Fields{
"pruneUpto": pruneUpto,
}).Debug("Pruning chain data")
tt := time.Now()
numBatches, err := p.pruneBatches(pruneUpto)
if err != nil {
return errors.Wrap(err, "failed to prune batches")
}
earliestAvailableSlot := pruneUpto + 1
// Update pruning checkpoint.
p.prunedUpto = pruneUpto
// Update the earliest available slot after pruning
if err := p.updateEarliestAvailableSlot(earliestAvailableSlot); err != nil {
return errors.Wrap(err, "update earliest available slot")
}
log.WithFields(logrus.Fields{
"prunedUpto": pruneUpto,
"earliestAvailableSlot": earliestAvailableSlot,
"duration": time.Since(tt),
"currentSlot": slot,
"batchSize": defaultPrunableBatchSize,
"numBatches": numBatches,
}).Debug("Successfully pruned chain data")
return nil
}
// updateEarliestAvailableSlot updates the earliest available slot via the injected custody updater
// and also persists it to the database.
func (p *Service) updateEarliestAvailableSlot(earliestAvailableSlot primitives.Slot) error {
if !params.FuluEnabled() {
return nil
}
// Update the p2p in-memory state
if err := p.custody.UpdateEarliestAvailableSlot(earliestAvailableSlot); err != nil {
return errors.Wrapf(err, "update earliest available slot after pruning to %d", earliestAvailableSlot)
}
// Persist to database to ensure it survives restarts
if err := p.db.UpdateEarliestAvailableSlot(p.ctx, earliestAvailableSlot); err != nil {
return errors.Wrapf(err, "update earliest available slot in database for slot %d", earliestAvailableSlot)
}
return nil
}
func (p *Service) pruneBatches(pruneUpto primitives.Slot) (int, error) {
ctx, cancel := context.WithTimeout(p.ctx, defaultPruningWindow)
defer cancel()
numBatches := 0
for {
select {
case <-ctx.Done():
return numBatches, nil
default:
for range defaultNumBatchesToPrune {
slotsDeleted, err := p.db.DeleteHistoricalDataBeforeSlot(ctx, pruneUpto, defaultPrunableBatchSize)
if err != nil {
return 0, errors.Wrapf(err, "could not delete upto slot %d", pruneUpto)
}
// Return if there's nothing to delete.
if slotsDeleted == 0 {
return numBatches, nil
}
numBatches++
}
}
}
}
// pruneStartSlotFunc returns the function to determine the start slot to start pruning.
func pruneStartSlotFunc(retentionEpochs primitives.Epoch) func(primitives.Slot) primitives.Slot {
return func(current primitives.Slot) primitives.Slot {
if retentionEpochs > slots.MaxSafeEpoch() {
retentionEpochs = slots.MaxSafeEpoch()
}
offset := slots.UnsafeEpochStart(retentionEpochs)
if offset >= current {
return 0
}
return current - offset
}
}