slasherkv: Set a 1 minute timeout on PruneAttestationOnEpoch operations (#15746)

* slasherkv: Set a 1 minute timeout on PruneAttestationOnEpoch operations to prevent very large bolt transactions.

* Fix CI

---------

Co-authored-by: Manu NALEPA <enalepa@offchainlabs.com>
This commit is contained in:
Preston Van Loon
2025-09-25 10:46:28 -05:00
committed by GitHub
parent 83d75bcb78
commit fe9dd255c7
2 changed files with 30 additions and 5 deletions

View File

@@ -4,17 +4,26 @@ import (
"bytes"
"context"
"encoding/binary"
"time"
"github.com/OffchainLabs/prysm/v6/consensus-types/primitives"
"github.com/OffchainLabs/prysm/v6/time/slots"
"github.com/pkg/errors"
bolt "go.etcd.io/bbolt"
)
var errTimeOut = errors.New("operation timed out")
// PruneAttestationsAtEpoch deletes all attestations from the slasher DB with target epoch
// less than or equal to the specified epoch.
func (s *Store) PruneAttestationsAtEpoch(
_ context.Context, maxEpoch primitives.Epoch,
ctx context.Context, maxEpoch primitives.Epoch,
) (numPruned uint, err error) {
// In some cases, pruning may take a very long time and consume significant memory in the
// open Update transaction. Therefore, we impose a 1 minute timeout on this operation.
ctx, cancel := context.WithTimeout(ctx, 1*time.Minute)
defer cancel()
// We can prune everything less than the current epoch - history length.
encodedEndPruneEpoch := make([]byte, 8)
binary.BigEndian.PutUint64(encodedEndPruneEpoch, uint64(maxEpoch))
@@ -48,13 +57,18 @@ func (s *Store) PruneAttestationsAtEpoch(
return
}
if err = s.db.Update(func(tx *bolt.Tx) error {
err = s.db.Update(func(tx *bolt.Tx) error {
signingRootsBkt := tx.Bucket(attestationDataRootsBucket)
attRecordsBkt := tx.Bucket(attestationRecordsBucket)
c := signingRootsBkt.Cursor()
// We begin a pruning iteration starting from the first item in the bucket.
for k, v := c.First(); k != nil; k, v = c.Next() {
if ctx.Err() != nil {
// Exit the routine if the context has expired.
return errTimeOut
}
// We check the epoch from the current key in the database.
// If we have hit an epoch that is greater than the end epoch of the pruning process,
// we then completely exit the process as we are done.
@@ -67,18 +81,27 @@ func (s *Store) PruneAttestationsAtEpoch(
// so it is possible we have a few adjacent objects that have the same slot, such as
// (target_epoch = 3 ++ _) => encode(attestation)
if err := signingRootsBkt.Delete(k); err != nil {
return err
return errors.Wrap(err, "delete attestation signing root")
}
if err := attRecordsBkt.Delete(v); err != nil {
return err
return errors.Wrap(err, "delete attestation record")
}
slasherAttestationsPrunedTotal.Inc()
numPruned++
}
return nil
}); err != nil {
})
if errors.Is(err, errTimeOut) {
log.Warning("Aborting pruning routine")
return
}
if err != nil {
log.WithError(err).Error("Failed to prune attestations")
return
}
return
}

View File

@@ -0,0 +1,2 @@
### Added
- Added a 1 minute timeout on PruneAttestationOnEpoch operations to prevent very large bolt transactions