Files
prysm/beacon-chain/db/slasherkv/pruning.go
Manu NALEPA 8070fc8ece Fix slasher disk usage leak. (#14151)
* `PruneProposalsAtEpoch`: Test return value in case of nothing to prune.

* `TestStore_PruneAttestations_OK`: Create unique validator indexes.

Before this commit, `attester2` for `j = n` was the same than
`attester1` for `j = n + 1`, resulting in erasure of a lot of attesters.

I guess it was not the initial intent.

* Slasher pruning: Check if the number of pruned items corresponds to the expectation.

Before this commit, if the pruning function did remove a superset
of the expected pruned items (including all the items), then the test would pass.

* Prune items that should be pruned and stop pruning items that should not be pruned.

The first 8 bytes of the key of `attestation-data-roots` and
`proposal-records` bytes correspond respectively to an encoded epoch and
and encoded slot.

The important word in this sentence is "encoded".
Before this commit, these slot/epoch are SSZ encoded, which means that
they are little-endian encoded.

However:
- `uint64PrefixGreaterThan` uses `bytes.Compare` which expects
   big-endian encoded values.
- `for k, _ := c.First(); k != nil; k, _ = c.Next()` iters over the
  keys in big-endian order.

The consequence is:
- Some items that should be pruned are not pruned, provoking a disk
  usage leak.
- Some items that should not be pruned are pruned, provoking errors like
  in https://github.com/prysmaticlabs/prysm/issues/13658.

This commit encodes the slot/epoch as big-endian before storing them in
the database keys.

Why this bug has not been detected in unit test before?

The values used in unit tests before this commit in
`TestStore_PruneProposalsAtEpoch` and `TestStore_PruneAttestations_OK`
are `10` and `20`.
Unfortunately, checking if `littleEndian(20) > littlenEndien(10)`
with the `>` operator considering operands as big-endian encoded returns
the expected result...

Just replacing `20` by `30` trigs the bug.

* Make deepsource happy.

* Slasher: Migrate database from little-endian to big-endian.

* Update beacon-chain/slasher/service.go

Co-authored-by: Preston Van Loon <pvanloon@offchainlabs.com>

* Update beacon-chain/db/slasherkv/migrate.go

Co-authored-by: Preston Van Loon <pvanloon@offchainlabs.com>

* `TestMigrate`: Fix documentation.

---------

Co-authored-by: Preston Van Loon <pvanloon@offchainlabs.com>
2024-07-03 13:33:07 +00:00

168 lines
4.9 KiB
Go

package slasherkv
import (
"bytes"
"context"
"encoding/binary"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
"github.com/prysmaticlabs/prysm/v5/time/slots"
bolt "go.etcd.io/bbolt"
)
// PruneAttestationsAtEpoch deletes all attestations from the slasher DB with target epoch
// less than or equal to the specified epoch.
func (s *Store) PruneAttestationsAtEpoch(
_ context.Context, maxEpoch primitives.Epoch,
) (numPruned uint, err error) {
// We can prune everything less than the current epoch - history length.
encodedEndPruneEpoch := make([]byte, 8)
binary.BigEndian.PutUint64(encodedEndPruneEpoch, uint64(maxEpoch))
// We retrieve the lowest stored epoch in the attestations bucket.
var lowestEpoch primitives.Epoch
var hasData bool
if err = s.db.View(func(tx *bolt.Tx) error {
bkt := tx.Bucket(attestationDataRootsBucket)
c := bkt.Cursor()
k, _ := c.First()
if k == nil {
return nil
}
hasData = true
lowestEpoch = primitives.Epoch(binary.BigEndian.Uint64(k))
return nil
}); err != nil {
return
}
// If there is no data stored, just exit early.
if !hasData {
return
}
// If the lowest epoch is greater than the end pruning epoch,
// there is nothing to prune, so we return early.
if lowestEpoch > maxEpoch {
log.Debugf("Lowest epoch %d is > pruning epoch %d, nothing to prune", lowestEpoch, maxEpoch)
return
}
if err = s.db.Update(func(tx *bolt.Tx) error {
signingRootsBkt := tx.Bucket(attestationDataRootsBucket)
attRecordsBkt := tx.Bucket(attestationRecordsBucket)
c := signingRootsBkt.Cursor()
// We begin a pruning iteration starting from the first item in the bucket.
for k, v := c.First(); k != nil; k, v = c.Next() {
// We check the epoch from the current key in the database.
// If we have hit an epoch that is greater than the end epoch of the pruning process,
// we then completely exit the process as we are done.
if uint64PrefixGreaterThan(k, encodedEndPruneEpoch) {
return nil
}
// Attestation in the database look like this:
// (target_epoch ++ _) => encode(attestation)
// so it is possible we have a few adjacent objects that have the same slot, such as
// (target_epoch = 3 ++ _) => encode(attestation)
if err := signingRootsBkt.Delete(k); err != nil {
return err
}
if err := attRecordsBkt.Delete(v); err != nil {
return err
}
slasherAttestationsPrunedTotal.Inc()
numPruned++
}
return nil
}); err != nil {
return
}
return
}
// PruneProposalsAtEpoch deletes all proposals from the slasher DB with epoch
// less than or equal to the specified epoch.
func (s *Store) PruneProposalsAtEpoch(
ctx context.Context, maxEpoch primitives.Epoch,
) (numPruned uint, err error) {
var endPruneSlot primitives.Slot
endPruneSlot, err = slots.EpochEnd(maxEpoch)
if err != nil {
return
}
encodedEndPruneSlot := make([]byte, 8)
binary.BigEndian.PutUint64(encodedEndPruneSlot, uint64(endPruneSlot))
// We retrieve the lowest stored slot in the proposals bucket.
var lowestSlot primitives.Slot
var hasData bool
if err = s.db.View(func(tx *bolt.Tx) error {
proposalBkt := tx.Bucket(proposalRecordsBucket)
c := proposalBkt.Cursor()
k, _ := c.First()
if k == nil {
return nil
}
hasData = true
lowestSlot = slotFromProposalKey(k)
return nil
}); err != nil {
return
}
// If there is no data stored, just exit early.
if !hasData {
return
}
// If the lowest slot is greater than the end pruning slot,
// there is nothing to prune, so we return early.
if lowestSlot > endPruneSlot {
log.Debugf("Lowest slot %d is > pruning slot %d, nothing to prune", lowestSlot, endPruneSlot)
return
}
if err = s.db.Update(func(tx *bolt.Tx) error {
proposalBkt := tx.Bucket(proposalRecordsBucket)
c := proposalBkt.Cursor()
// We begin a pruning iteration starting from the first item in the bucket.
for k, _ := c.First(); k != nil; k, _ = c.Next() {
if ctx.Err() != nil {
return ctx.Err()
}
// We check the slot from the current key in the database.
// If we have hit a slot that is greater than the end slot of the pruning process,
// we then completely exit the process as we are done.
if uint64PrefixGreaterThan(k, encodedEndPruneSlot) {
return nil
}
// Proposals in the database look like this:
// (slot ++ validatorIndex) => encode(proposal)
// so it is possible we have a few adjacent objects that have the same slot, such as
// (slot = 3 ++ validatorIndex = 0) => ...
// (slot = 3 ++ validatorIndex = 1) => ...
// (slot = 3 ++ validatorIndex = 2) => ...
if err := proposalBkt.Delete(k); err != nil {
return err
}
slasherProposalsPrunedTotal.Inc()
numPruned++
}
return nil
}); err != nil {
return
}
return
}
func slotFromProposalKey(key []byte) primitives.Slot {
return primitives.Slot(binary.BigEndian.Uint64(key[:8]))
}
func uint64PrefixGreaterThan(key, lessThan []byte) bool {
enc := key[:8]
return bytes.Compare(enc, lessThan) > 0
}