Compare commits

...

30 Commits

Author SHA1 Message Date
Kasey Kirkham
8d6b3a9fb2 Limit columns requested instead of making block batches small 2025-11-20 18:15:52 -06:00
Kasey Kirkham
1438a351d7 remove slice arg from NotBusy filter 2025-11-20 13:32:18 -06:00
Kasey Kirkham
0f47577662 Test coverage for log.go and some small improvements
Co-authored-by: Claude <noreply@anthropic.com>
2025-11-20 13:32:18 -06:00
Preston Van Loon
345d587204 Add comprehensive mutation test coverage for columns.go
This commit adds 25 new test cases to improve mutation testing coverage for
the backfill columns sync functionality, addressing 38 previously escaped
mutants.

Test coverage added:
- buildColumnBatch(): Array indexing edge cases, fork epoch boundaries,
  pre/post-Fulu block handling, and control flow mutations
- countedValidation(): Validation error paths, commitment mismatches,
  and state update verification (Unset/addPeerColumns calls)
- validate(): Metrics recording wrapper and error propagation
- newColumnSync(): Initialization paths and nil columnBatch handling
- currentCustodiedColumns(): Column indices retrieval
- columnSync wrapper methods: Nil checks and delegation logic

The new tests specifically target:
- Array indexing bugs (len-1, len+1, len-0 mutations)
- Boundary conditions at Fulu fork epoch (< vs <=)
- Branch coverage for error handling paths
- Statement removal detection for critical state updates
- Expression and comparison operator mutations

All 25 new test cases pass successfully, bringing function coverage from
14% (1/7 functions) to 100% (7/7 functions) and estimated mutation
coverage from ~0% to ~95%+.
2025-11-20 08:54:07 -06:00
Preston Van Loon
013d8ca4fd TestColumnBatch 2025-11-20 08:54:07 -06:00
Kasey Kirkham
85b1414119 Test coverage for log.go and some small improvements
Co-authored-by: Claude <noreply@anthropic.com>
2025-11-20 00:21:59 -06:00
Kasey Kirkham
b7e999d651 Test coverage for verify_column.go
Co-authored-by: Claude <noreply@anthropic.com>
2025-11-19 16:17:56 -06:00
Kasey Kirkham
d6209ae5c3 rm extra tick 2025-11-19 13:18:04 -06:00
Kasey Kirkham
36e8947068 more comment fixes 2025-11-19 13:01:33 -06:00
Kasey Kirkham
0a827f17d5 more naming etc feedback 2025-11-19 12:24:07 -06:00
Kasey Kirkham
8d126196d9 update changelog to include message about flag default changing 2025-11-19 11:42:02 -06:00
Kasey Kirkham
094cee25ac more comment cleanup 2025-11-19 11:41:01 -06:00
Kasey Kirkham
bbd856fe6f extra BisectionIterator as a separate interface 2025-11-19 11:20:22 -06:00
Kasey Kirkham
b9a7cb3764 more manu feedback 2025-11-19 10:40:49 -06:00
Kasey Kirkham
61d4a6c105 don't try to cache peerdas.Info 2025-11-18 17:44:38 -06:00
Kasey Kirkham
1037e56238 manu feedback 2025-11-18 17:39:20 -06:00
Kasey Kirkham
ac0b3cb593 remove "feature" to slice result from BestFinalized 2025-11-18 16:22:31 -06:00
Kasey Kirkham
d156168712 Avoid requesting blocks from peer that gave us an invalid batch 2025-11-18 10:48:52 -06:00
Kasey Kirkham
1644dc6323 decrease default batch size to compensate for data column overhead 2025-11-13 13:14:21 -06:00
Kasey Kirkham
29257b10ec avoid debug log spam that comes from computing custody info pre-fulu 2025-11-12 16:35:40 -06:00
Kasey Kirkham
6849302288 re-enable backfill for fulu 2025-11-12 16:35:40 -06:00
Kasey Kirkham
58f6b3ff3c fixing rebase 2025-11-12 16:35:37 -06:00
Kasey Kirkham
51bca0d08c make daChecker less ambiguously stateful 2025-11-12 16:00:39 -06:00
Kasey Kirkham
3697b1db50 multiStore/Checker to validate dependencies/state up front 2025-11-12 16:00:39 -06:00
Kasey Kirkham
c1b361ce0c remove or rewrite non-actionable TODOs 2025-11-12 16:00:39 -06:00
Kasey Kirkham
20bbc60efe replace panic that "shouldn't happen" with a safe shutdown 2025-11-12 16:00:39 -06:00
Kasey Kirkham
0f9b87cb59 downscore peers on block batch failures 2025-11-12 16:00:39 -06:00
Kasey Kirkham
d6ce7e0b9f potuz' feedback 2025-11-12 16:00:39 -06:00
Kasey Kirkham
9d8f45940a filter locally available columns from backfill batch 2025-11-12 16:00:39 -06:00
Kasey
4424cce30d DataColumnSidecar backfill 2025-11-12 16:00:39 -06:00
56 changed files with 4937 additions and 694 deletions

View File

@@ -134,7 +134,7 @@ func getStateVersionAndPayload(st state.BeaconState) (int, interfaces.ExecutionD
return preStateVersion, preStateHeader, nil
}
func (s *Service) onBlockBatch(ctx context.Context, blks []consensusblocks.ROBlock, avs das.AvailabilityStore) error {
func (s *Service) onBlockBatch(ctx context.Context, blks []consensusblocks.ROBlock, avs das.AvailabilityChecker) error {
ctx, span := trace.StartSpan(ctx, "blockChain.onBlockBatch")
defer span.End()
@@ -306,7 +306,7 @@ func (s *Service) onBlockBatch(ctx context.Context, blks []consensusblocks.ROBlo
return s.saveHeadNoDB(ctx, lastB, lastBR, preState, !isValidPayload)
}
func (s *Service) areSidecarsAvailable(ctx context.Context, avs das.AvailabilityStore, roBlock consensusblocks.ROBlock) error {
func (s *Service) areSidecarsAvailable(ctx context.Context, avs das.AvailabilityChecker, roBlock consensusblocks.ROBlock) error {
blockVersion := roBlock.Version()
block := roBlock.Block()
slot := block.Slot()

View File

@@ -39,8 +39,8 @@ var epochsSinceFinalityExpandCache = primitives.Epoch(4)
// BlockReceiver interface defines the methods of chain service for receiving and processing new blocks.
type BlockReceiver interface {
ReceiveBlock(ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, avs das.AvailabilityStore) error
ReceiveBlockBatch(ctx context.Context, blocks []blocks.ROBlock, avs das.AvailabilityStore) error
ReceiveBlock(ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, avs das.AvailabilityChecker) error
ReceiveBlockBatch(ctx context.Context, blocks []blocks.ROBlock, avs das.AvailabilityChecker) error
HasBlock(ctx context.Context, root [32]byte) bool
RecentBlockSlot(root [32]byte) (primitives.Slot, error)
BlockBeingSynced([32]byte) bool
@@ -69,7 +69,7 @@ type SlashingReceiver interface {
// 1. Validate block, apply state transition and update checkpoints
// 2. Apply fork choice to the processed block
// 3. Save latest head info
func (s *Service) ReceiveBlock(ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, avs das.AvailabilityStore) error {
func (s *Service) ReceiveBlock(ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, avs das.AvailabilityChecker) error {
ctx, span := trace.StartSpan(ctx, "blockChain.ReceiveBlock")
defer span.End()
// Return early if the block is blacklisted
@@ -242,7 +242,7 @@ func (s *Service) validateExecutionAndConsensus(
return postState, isValidPayload, nil
}
func (s *Service) handleDA(ctx context.Context, avs das.AvailabilityStore, block blocks.ROBlock) (time.Duration, error) {
func (s *Service) handleDA(ctx context.Context, avs das.AvailabilityChecker, block blocks.ROBlock) (time.Duration, error) {
var err error
start := time.Now()
if avs != nil {
@@ -332,7 +332,7 @@ func (s *Service) executePostFinalizationTasks(ctx context.Context, finalizedSta
// ReceiveBlockBatch processes the whole block batch at once, assuming the block batch is linear ,transitioning
// the state, performing batch verification of all collected signatures and then performing the appropriate
// actions for a block post-transition.
func (s *Service) ReceiveBlockBatch(ctx context.Context, blocks []blocks.ROBlock, avs das.AvailabilityStore) error {
func (s *Service) ReceiveBlockBatch(ctx context.Context, blocks []blocks.ROBlock, avs das.AvailabilityChecker) error {
ctx, span := trace.StartSpan(ctx, "blockChain.ReceiveBlockBatch")
defer span.End()

View File

@@ -275,7 +275,7 @@ func (s *ChainService) ReceiveBlockInitialSync(ctx context.Context, block interf
}
// ReceiveBlockBatch processes blocks in batches from initial-sync.
func (s *ChainService) ReceiveBlockBatch(ctx context.Context, blks []blocks.ROBlock, _ das.AvailabilityStore) error {
func (s *ChainService) ReceiveBlockBatch(ctx context.Context, blks []blocks.ROBlock, _ das.AvailabilityChecker) error {
if s.State == nil {
return ErrNilState
}
@@ -305,7 +305,7 @@ func (s *ChainService) ReceiveBlockBatch(ctx context.Context, blks []blocks.ROBl
}
// ReceiveBlock mocks ReceiveBlock method in chain service.
func (s *ChainService) ReceiveBlock(ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, _ [32]byte, _ das.AvailabilityStore) error {
func (s *ChainService) ReceiveBlock(ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, _ [32]byte, _ das.AvailabilityChecker) error {
if s.ReceiveBlockMockErr != nil {
return s.ReceiveBlockMockErr
}

View File

@@ -107,3 +107,104 @@ func computeInfoCacheKey(nodeID enode.ID, custodyGroupCount uint64) [nodeInfoCac
return key
}
// ColumnIndices represents as a set of ColumnIndices. This could be the set of indices that a node is required to custody,
// the set that a peer custodies, missing indices for a given block, indices that are present on disk, etc.
type ColumnIndices map[uint64]struct{}
// Has returns true if the index is present in the ColumnIndices.
func (ci ColumnIndices) Has(index uint64) bool {
_, ok := ci[index]
return ok
}
// Count returns the number of indices present in the ColumnIndices.
func (ci ColumnIndices) Count() int {
return len(ci)
}
// Set sets the index in the ColumnIndices.
func (ci ColumnIndices) Set(index uint64) {
ci[index] = struct{}{}
}
// Unset removes the index from the ColumnIndices.
func (ci ColumnIndices) Unset(index uint64) {
delete(ci, index)
}
// Copy creates a copy of the ColumnIndices.
func (ci ColumnIndices) Copy() ColumnIndices {
newCi := make(ColumnIndices, len(ci))
for index, set := range ci {
newCi[index] = set
}
return newCi
}
// Intersection returns a new ColumnIndices that contains only the indices that are present in both ColumnIndices.
func (ci ColumnIndices) Intersection(other ColumnIndices) ColumnIndices {
result := make(ColumnIndices)
for index := range ci {
if other.Has(index) {
result.Set(index)
}
}
return result
}
// Union mutates the receiver so that any index that is set in either of
// the two ColumnIndices is set in the receiver after the function finishes.
// It does not mutate the other ColumnIndices given as a function argument.
func (ci ColumnIndices) Union(other ColumnIndices) {
for index := range other {
ci.Set(index)
}
}
// ToMap converts a ColumnIndices into a map[uint64]struct{}.
// In the future ColumnIndices may be changed to a bit map, so using
// ToMap will ensure forwards-compatibility.
func (ci ColumnIndices) ToMap() map[uint64]struct{} {
return ci.Copy()
}
// ToSlice converts a ColumnIndices into a slice of uint64 indices.
func (ci ColumnIndices) ToSlice() []uint64 {
indices := make([]uint64, 0, len(ci))
for index := range ci {
indices = append(indices, index)
}
return indices
}
// NewColumnIndicesFromSlice creates a ColumnIndices from a slice of uint64.
func NewColumnIndicesFromSlice(indices []uint64) ColumnIndices {
ci := make(ColumnIndices, len(indices))
for _, index := range indices {
ci[index] = struct{}{}
}
return ci
}
// NewColumnIndicesFromMap creates a ColumnIndices from a map[uint64]bool. This kind of map
// is used in several places in peerdas code. Converting from this map type to ColumnIndices
// will allow us to move ColumnIndices underlying type to a bitmap in the future and avoid
// lots of loops for things like intersections/unions or copies.
func NewColumnIndicesFromMap(indices map[uint64]bool) ColumnIndices {
ci := make(ColumnIndices, len(indices))
for index, set := range indices {
if !set {
continue
}
ci[index] = struct{}{}
}
return ci
}
// NewColumnIndices creates an empty ColumnIndices.
// In the future ColumnIndices may change from a reference type to a value type,
// so using this constructor will ensure forwards-compatibility.
func NewColumnIndices() ColumnIndices {
return make(ColumnIndices)
}

View File

@@ -25,3 +25,10 @@ func TestInfo(t *testing.T) {
require.DeepEqual(t, expectedDataColumnsSubnets, actual.DataColumnsSubnets)
}
}
func TestNewColumnIndicesFromMap(t *testing.T) {
t.Run("nil map", func(t *testing.T) {
ci := peerdas.NewColumnIndicesFromMap(nil)
require.Equal(t, 0, ci.Count())
})
}

View File

@@ -4,14 +4,18 @@ go_library(
name = "go_default_library",
srcs = [
"availability_blobs.go",
"availability_columns.go",
"bisect.go",
"blob_cache.go",
"data_column_cache.go",
"iface.go",
"log.go",
"mock.go",
],
importpath = "github.com/OffchainLabs/prysm/v7/beacon-chain/das",
visibility = ["//visibility:public"],
deps = [
"//beacon-chain/core/peerdas:go_default_library",
"//beacon-chain/db/filesystem:go_default_library",
"//beacon-chain/verification:go_default_library",
"//config/fieldparams:go_default_library",
@@ -21,6 +25,7 @@ go_library(
"//runtime/logging:go_default_library",
"//runtime/version:go_default_library",
"//time/slots:go_default_library",
"@com_github_ethereum_go_ethereum//p2p/enode:go_default_library",
"@com_github_pkg_errors//:go_default_library",
"@com_github_sirupsen_logrus//:go_default_library",
],
@@ -30,11 +35,13 @@ go_test(
name = "go_default_test",
srcs = [
"availability_blobs_test.go",
"availability_columns_test.go",
"blob_cache_test.go",
"data_column_cache_test.go",
],
embed = [":go_default_library"],
deps = [
"//beacon-chain/core/peerdas:go_default_library",
"//beacon-chain/db/filesystem:go_default_library",
"//beacon-chain/verification:go_default_library",
"//config/fieldparams:go_default_library",
@@ -45,6 +52,7 @@ go_test(
"//testing/require:go_default_library",
"//testing/util:go_default_library",
"//time/slots:go_default_library",
"@com_github_ethereum_go_ethereum//p2p/enode:go_default_library",
"@com_github_pkg_errors//:go_default_library",
],
)

View File

@@ -13,7 +13,7 @@ import (
"github.com/OffchainLabs/prysm/v7/runtime/version"
"github.com/OffchainLabs/prysm/v7/time/slots"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
"github.com/sirupsen/logrus"
)
var (
@@ -29,7 +29,7 @@ type LazilyPersistentStoreBlob struct {
verifier BlobBatchVerifier
}
var _ AvailabilityStore = &LazilyPersistentStoreBlob{}
var _ AvailabilityChecker = &LazilyPersistentStoreBlob{}
// BlobBatchVerifier enables LazyAvailabilityStore to manage the verification process
// going from ROBlob->VerifiedROBlob, while avoiding the decision of which individual verifications
@@ -81,7 +81,16 @@ func (s *LazilyPersistentStoreBlob) Persist(current primitives.Slot, sidecars ..
// IsDataAvailable returns nil if all the commitments in the given block are persisted to the db and have been verified.
// BlobSidecars already in the db are assumed to have been previously verified against the block.
func (s *LazilyPersistentStoreBlob) IsDataAvailable(ctx context.Context, current primitives.Slot, b blocks.ROBlock) error {
func (s *LazilyPersistentStoreBlob) IsDataAvailable(ctx context.Context, current primitives.Slot, blks ...blocks.ROBlock) error {
for _, b := range blks {
if err := s.checkOne(ctx, current, b); err != nil {
return err
}
}
return nil
}
func (s *LazilyPersistentStoreBlob) checkOne(ctx context.Context, current primitives.Slot, b blocks.ROBlock) error {
blockCommitments, err := commitmentsToCheck(b, current)
if err != nil {
return errors.Wrapf(err, "could not check data availability for block %#x", b.Root())
@@ -112,7 +121,7 @@ func (s *LazilyPersistentStoreBlob) IsDataAvailable(ctx context.Context, current
ok := errors.As(err, &me)
if ok {
fails := me.Failures()
lf := make(log.Fields, len(fails))
lf := make(logrus.Fields, len(fails))
for i := range fails {
lf[fmt.Sprintf("fail_%d", i)] = fails[i].Error()
}

View File

@@ -170,7 +170,7 @@ func TestLazyPersistOnceCommitted(t *testing.T) {
// stashes as expected
require.NoError(t, as.Persist(ds, blobSidecars...))
// ignores duplicates
require.ErrorIs(t, as.Persist(ds, blobSidecars...), ErrDuplicateSidecar)
require.ErrorIs(t, as.Persist(ds, blobSidecars...), errDuplicateSidecar)
// ignores index out of bound
blobSidecars[0].Index = 6

View File

@@ -0,0 +1,245 @@
package das
import (
"context"
"io"
"github.com/OffchainLabs/prysm/v7/beacon-chain/core/peerdas"
"github.com/OffchainLabs/prysm/v7/beacon-chain/db/filesystem"
"github.com/OffchainLabs/prysm/v7/beacon-chain/verification"
"github.com/OffchainLabs/prysm/v7/config/params"
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
"github.com/OffchainLabs/prysm/v7/time/slots"
"github.com/ethereum/go-ethereum/p2p/enode"
errors "github.com/pkg/errors"
)
// LazilyPersistentStoreColumn is an implementation of AvailabilityStore to be used when batch syncing data columns.
// This implementation will hold any data columns passed to Persist until the IsDataAvailable is called for their
// block, at which time they will undergo full verification and be saved to the disk.
type LazilyPersistentStoreColumn struct {
store *filesystem.DataColumnStorage
cache *dataColumnCache
newDataColumnsVerifier verification.NewDataColumnsVerifier
custody *custodyRequirement
bisector Bisector
}
var _ AvailabilityChecker = &LazilyPersistentStoreColumn{}
// DataColumnsVerifier enables LazilyPersistentStoreColumn to manage the verification process
// going from RODataColumn->VerifiedRODataColumn, while avoiding the decision of which individual verifications
// to run and in what order. Since LazilyPersistentStoreColumn always tries to verify and save data columns only when
// they are all available, the interface takes a slice of data column sidecars.
type DataColumnsVerifier interface {
VerifiedRODataColumns(ctx context.Context, blk blocks.ROBlock, scs []blocks.RODataColumn) ([]blocks.VerifiedRODataColumn, error)
}
// NewLazilyPersistentStoreColumn creates a new LazilyPersistentStoreColumn.
// WARNING: The resulting LazilyPersistentStoreColumn is NOT thread-safe.
func NewLazilyPersistentStoreColumn(
store *filesystem.DataColumnStorage,
newDataColumnsVerifier verification.NewDataColumnsVerifier,
nodeID enode.ID,
cgc uint64,
bisector Bisector,
) *LazilyPersistentStoreColumn {
return &LazilyPersistentStoreColumn{
store: store,
cache: newDataColumnCache(),
newDataColumnsVerifier: newDataColumnsVerifier,
custody: &custodyRequirement{nodeID: nodeID, cgc: cgc},
bisector: bisector,
}
}
// PersistColumns adds columns to the working column cache. Columns stored in this cache will be persisted
// for at least as long as the node is running. Once IsDataAvailable succeeds, all columns referenced
// by the given block are guaranteed to be persisted for the remainder of the retention period.
func (s *LazilyPersistentStoreColumn) Persist(current primitives.Slot, sidecars ...blocks.RODataColumn) error {
currentEpoch := slots.ToEpoch(current)
for _, sidecar := range sidecars {
if !params.WithinDAPeriod(slots.ToEpoch(sidecar.Slot()), currentEpoch) {
continue
}
if err := s.cache.stash(sidecar); err != nil {
return errors.Wrap(err, "stash DataColumnSidecar")
}
}
return nil
}
// IsDataAvailable returns nil if all the commitments in the given block are persisted to the db and have been verified.
// DataColumnsSidecars already in the db are assumed to have been previously verified against the block.
func (s *LazilyPersistentStoreColumn) IsDataAvailable(ctx context.Context, current primitives.Slot, blks ...blocks.ROBlock) error {
currentEpoch := slots.ToEpoch(current)
toVerify := make([]blocks.RODataColumn, 0)
for _, block := range blks {
indices, err := s.required(block, currentEpoch)
if err != nil {
return errors.Wrapf(err, "full commitments to check with block root `%#x` and current slot `%d`", block.Root(), current)
}
if indices.Count() == 0 {
continue
}
key := keyFromBlock(block)
entry := s.cache.entry(key)
toVerify, err = entry.append(toVerify, IndicesNotStored(s.store.Summary(block.Root()), indices))
if err != nil {
return errors.Wrap(err, "entry filter")
}
}
if err := s.verifyAndSave(toVerify); err != nil {
log.Warn("Batch verification failed, bisecting columns by peer")
if err := s.bisectVerification(toVerify); err != nil {
return errors.Wrap(err, "bisect verification")
}
}
s.cache.cleanup(blks)
return nil
}
// required returns the set of column indices to check for a given block.
func (s *LazilyPersistentStoreColumn) required(block blocks.ROBlock, current primitives.Epoch) (peerdas.ColumnIndices, error) {
eBlk := slots.ToEpoch(block.Block().Slot())
eFulu := params.BeaconConfig().FuluForkEpoch
if current < eFulu || eBlk < eFulu || !params.WithinDAPeriod(eBlk, current) {
return peerdas.NewColumnIndices(), nil
}
// If there are any commitments in the block, there are blobs,
// and if there are blobs, we need the columns bisecting those blobs.
commitments, err := block.Block().Body().BlobKzgCommitments()
if err != nil {
return nil, errors.Wrap(err, "blob KZG commitments")
}
// No DA check needed if the block has no blobs.
if len(commitments) == 0 {
return peerdas.NewColumnIndices(), nil
}
return s.custody.required(current)
}
// verifyAndSave calls Save on the column store if the columns pass verification.
func (s *LazilyPersistentStoreColumn) verifyAndSave(columns []blocks.RODataColumn) error {
verified, err := s.verifyColumns(columns)
if err != nil {
return errors.Wrap(err, "verify columns")
}
if err := s.store.Save(verified); err != nil {
return errors.Wrap(err, "save data column sidecars")
}
return nil
}
func (s *LazilyPersistentStoreColumn) verifyColumns(columns []blocks.RODataColumn) ([]blocks.VerifiedRODataColumn, error) {
verifier := s.newDataColumnsVerifier(columns, verification.ByRangeRequestDataColumnSidecarRequirements)
if err := verifier.ValidFields(); err != nil {
return nil, errors.Wrap(err, "valid fields")
}
if err := verifier.SidecarInclusionProven(); err != nil {
return nil, errors.Wrap(err, "sidecar inclusion proven")
}
if err := verifier.SidecarKzgProofVerified(); err != nil {
return nil, errors.Wrap(err, "sidecar KZG proof verified")
}
return verifier.VerifiedRODataColumns()
}
// bisectVerification is used when verification of a batch of columns fails. Since the batch could
// span multiple blocks or have been fetched from multiple peers, this pattern enables code using the
// store to break the verification into smaller units and learn the results, in order to plan to retry
// retrieval of the unusable columns.
func (s *LazilyPersistentStoreColumn) bisectVerification(columns []blocks.RODataColumn) error {
if len(columns) == 0 {
return nil
}
if s.bisector == nil {
return errors.New("bisector not initialized")
}
iter, err := s.bisector.Bisect(columns)
if err != nil {
return errors.Wrap(err, "Bisector.Bisect")
}
// It's up to the bisector how to chunk up columns for verification,
// which could be by block, or by peer, or any other strategy.
// For the purposes of range syncing or backfill this will be by peer,
// so that the node can learn which peer is giving us bad data and downscore them.
for columns, err := iter.Next(); columns != nil; columns, err = iter.Next() {
if err != nil {
if !errors.Is(err, io.EOF) {
return errors.Wrap(err, "Bisector.Next")
}
break // io.EOF signals end of iteration
}
// We save the parts of the batch that have been verified successfully even though we don't know
// if all columns for the block will be available until the block is imported.
if err := s.verifyAndSave(s.columnsNotStored(columns)); err != nil {
iter.OnError(err)
continue
}
}
// This should give us a single error representing any unresolved errors seen via onError.
return iter.Error()
}
// columnsNotStored filters the list of ROColumnSidecars to only include those that are not found in the storage summary.
func (s *LazilyPersistentStoreColumn) columnsNotStored(sidecars []blocks.RODataColumn) []blocks.RODataColumn {
// We use this method to filter a set of sidecars that were previously seen to be unavailable on disk. So our base assumption
// is that they are still available and we don't need to copy the list. Instead we make a slice of any indices that are unexpectedly
// stored and only when we find that the storage view has changed do we need to create a new slice.
stored := make(map[int]struct{}, 0)
lastRoot := [32]byte{}
var sum filesystem.DataColumnStorageSummary
for i, sc := range sidecars {
if sc.BlockRoot() != lastRoot {
sum = s.store.Summary(sc.BlockRoot())
lastRoot = sc.BlockRoot()
}
if sum.HasIndex(sc.Index) {
stored[i] = struct{}{}
}
}
// If the view on storage hasn't changed, return the original list.
if len(stored) == 0 {
return sidecars
}
shift := 0
for i := range sidecars {
if _, ok := stored[i]; ok {
// If the index is stored, skip and overwrite it.
// Track how many spaces down to shift unseen sidecars (to overwrite the previously shifted or seen).
shift++
continue
}
if shift > 0 {
// If the index is not stored and we have seen stored indices,
// we need to shift the current index down.
sidecars[i-shift] = sidecars[i]
}
}
return sidecars[:len(sidecars)-shift]
}
type custodyRequirement struct {
nodeID enode.ID
cgc uint64 // custody group count
indices peerdas.ColumnIndices
}
func (c *custodyRequirement) required(current primitives.Epoch) (peerdas.ColumnIndices, error) {
peerInfo, _, err := peerdas.Info(c.nodeID, max(c.cgc, params.BeaconConfig().SamplesPerSlot))
if err != nil {
return peerdas.NewColumnIndices(), errors.Wrap(err, "peer info")
}
return peerdas.NewColumnIndicesFromMap(peerInfo.CustodyColumns), nil
}

View File

@@ -0,0 +1,294 @@
package das
import (
"context"
"testing"
"github.com/OffchainLabs/prysm/v7/beacon-chain/db/filesystem"
"github.com/OffchainLabs/prysm/v7/beacon-chain/verification"
fieldparams "github.com/OffchainLabs/prysm/v7/config/fieldparams"
"github.com/OffchainLabs/prysm/v7/config/params"
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
"github.com/OffchainLabs/prysm/v7/encoding/bytesutil"
"github.com/OffchainLabs/prysm/v7/testing/require"
"github.com/OffchainLabs/prysm/v7/testing/util"
"github.com/OffchainLabs/prysm/v7/time/slots"
"github.com/ethereum/go-ethereum/p2p/enode"
)
var commitments = [][]byte{
bytesutil.PadTo([]byte("a"), 48),
bytesutil.PadTo([]byte("b"), 48),
bytesutil.PadTo([]byte("c"), 48),
bytesutil.PadTo([]byte("d"), 48),
}
func TestPersist(t *testing.T) {
t.Run("no sidecars", func(t *testing.T) {
dataColumnStorage := filesystem.NewEphemeralDataColumnStorage(t)
lazilyPersistentStoreColumns := NewLazilyPersistentStoreColumn(dataColumnStorage, nil, enode.ID{}, 0, nil)
err := lazilyPersistentStoreColumns.Persist(0)
require.NoError(t, err)
require.Equal(t, 0, len(lazilyPersistentStoreColumns.cache.entries))
})
t.Run("outside DA period", func(t *testing.T) {
dataColumnStorage := filesystem.NewEphemeralDataColumnStorage(t)
dataColumnParamsByBlockRoot := []util.DataColumnParam{
{Slot: 1, Index: 1},
}
roSidecars, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, dataColumnParamsByBlockRoot)
lazilyPersistentStoreColumns := NewLazilyPersistentStoreColumn(dataColumnStorage, nil, enode.ID{}, 0, nil)
err := lazilyPersistentStoreColumns.Persist(1_000_000, roSidecars...)
require.NoError(t, err)
require.Equal(t, 0, len(lazilyPersistentStoreColumns.cache.entries))
})
t.Run("nominal", func(t *testing.T) {
const slot = 42
store := filesystem.NewEphemeralDataColumnStorage(t)
dataColumnParamsByBlockRoot := []util.DataColumnParam{
{Slot: slot, Index: 1},
{Slot: slot, Index: 5},
}
roSidecars, roDataColumns := util.CreateTestVerifiedRoDataColumnSidecars(t, dataColumnParamsByBlockRoot)
avs := NewLazilyPersistentStoreColumn(store, nil, enode.ID{}, 0, nil)
err := avs.Persist(slot, roSidecars...)
require.NoError(t, err)
require.Equal(t, 1, len(avs.cache.entries))
key := cacheKey{slot: slot, root: roDataColumns[0].BlockRoot()}
entry, ok := avs.cache.entries[key]
require.Equal(t, true, ok)
summary := store.Summary(key.root)
// A call to Persist does NOT save the sidecars to disk.
require.Equal(t, uint64(0), summary.Count())
require.Equal(t, len(roSidecars), len(entry.scs))
idx1 := entry.scs[1]
require.NotNil(t, idx1)
require.DeepSSZEqual(t, roDataColumns[0].BlockRoot(), idx1.BlockRoot())
idx5 := entry.scs[5]
require.NotNil(t, idx5)
require.DeepSSZEqual(t, roDataColumns[1].BlockRoot(), idx5.BlockRoot())
for i, roDataColumn := range entry.scs {
if map[uint64]bool{1: true, 5: true}[i] {
continue
}
require.IsNil(t, roDataColumn)
}
})
}
func TestIsDataAvailable(t *testing.T) {
params.SetupTestConfigCleanup(t)
params.BeaconConfig().FuluForkEpoch = params.BeaconConfig().ElectraForkEpoch + 4096*2
newDataColumnsVerifier := func(dataColumnSidecars []blocks.RODataColumn, _ []verification.Requirement) verification.DataColumnsVerifier {
return &mockDataColumnsVerifier{t: t, dataColumnSidecars: dataColumnSidecars}
}
ctx := t.Context()
t.Run("without commitments", func(t *testing.T) {
signedBeaconBlockFulu := util.NewBeaconBlockFulu()
signedRoBlock := newSignedRoBlock(t, signedBeaconBlockFulu)
dataColumnStorage := filesystem.NewEphemeralDataColumnStorage(t)
lazilyPersistentStoreColumns := NewLazilyPersistentStoreColumn(dataColumnStorage, newDataColumnsVerifier, enode.ID{}, 0, nil)
err := lazilyPersistentStoreColumns.IsDataAvailable(ctx, 0 /*current slot*/, signedRoBlock)
require.NoError(t, err)
})
t.Run("with commitments", func(t *testing.T) {
signedBeaconBlockFulu := util.NewBeaconBlockFulu()
signedBeaconBlockFulu.Block.Slot = primitives.Slot(params.BeaconConfig().FuluForkEpoch) * params.BeaconConfig().SlotsPerEpoch
signedBeaconBlockFulu.Block.Body.BlobKzgCommitments = commitments
signedRoBlock := newSignedRoBlock(t, signedBeaconBlockFulu)
block := signedRoBlock.Block()
slot := block.Slot()
proposerIndex := block.ProposerIndex()
parentRoot := block.ParentRoot()
stateRoot := block.StateRoot()
bodyRoot, err := block.Body().HashTreeRoot()
require.NoError(t, err)
root := signedRoBlock.Root()
dataColumnStorage := filesystem.NewEphemeralDataColumnStorage(t)
lazilyPersistentStoreColumns := NewLazilyPersistentStoreColumn(dataColumnStorage, newDataColumnsVerifier, enode.ID{}, 0, nil)
indices := [...]uint64{1, 17, 19, 42, 75, 87, 102, 117}
dataColumnsParams := make([]util.DataColumnParam, 0, len(indices))
for _, index := range indices {
dataColumnParams := util.DataColumnParam{
Index: index,
KzgCommitments: commitments,
Slot: slot,
ProposerIndex: proposerIndex,
ParentRoot: parentRoot[:],
StateRoot: stateRoot[:],
BodyRoot: bodyRoot[:],
}
dataColumnsParams = append(dataColumnsParams, dataColumnParams)
}
_, verifiedRoDataColumns := util.CreateTestVerifiedRoDataColumnSidecars(t, dataColumnsParams)
key := keyFromBlock(signedRoBlock)
entry := lazilyPersistentStoreColumns.cache.entry(key)
defer lazilyPersistentStoreColumns.cache.delete(key)
for _, verifiedRoDataColumn := range verifiedRoDataColumns {
err := entry.stash(verifiedRoDataColumn.RODataColumn)
require.NoError(t, err)
}
err = lazilyPersistentStoreColumns.IsDataAvailable(ctx, slot, signedRoBlock)
require.NoError(t, err)
actual, err := dataColumnStorage.Get(root, indices[:])
require.NoError(t, err)
summary := dataColumnStorage.Summary(root)
require.Equal(t, uint64(len(indices)), summary.Count())
require.DeepSSZEqual(t, verifiedRoDataColumns, actual)
})
}
func TestFullCommitmentsToCheck(t *testing.T) {
windowSlots, err := slots.EpochEnd(params.BeaconConfig().MinEpochsForDataColumnSidecarsRequest)
require.NoError(t, err)
testCases := []struct {
name string
commitments [][]byte
block func(*testing.T) blocks.ROBlock
slot primitives.Slot
}{
{
name: "Pre-Fulu block",
block: func(t *testing.T) blocks.ROBlock {
return newSignedRoBlock(t, util.NewBeaconBlockElectra())
},
},
{
name: "Commitments outside data availability window",
block: func(t *testing.T) blocks.ROBlock {
beaconBlockElectra := util.NewBeaconBlockElectra()
// Block is from slot 0, "current slot" is window size +1 (so outside the window)
beaconBlockElectra.Block.Body.BlobKzgCommitments = commitments
return newSignedRoBlock(t, beaconBlockElectra)
},
slot: windowSlots + 1,
},
{
name: "Commitments within data availability window",
block: func(t *testing.T) blocks.ROBlock {
signedBeaconBlockFulu := util.NewBeaconBlockFulu()
signedBeaconBlockFulu.Block.Body.BlobKzgCommitments = commitments
signedBeaconBlockFulu.Block.Slot = 100
return newSignedRoBlock(t, signedBeaconBlockFulu)
},
commitments: commitments,
slot: 100,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
numberOfColumns := params.BeaconConfig().NumberOfColumns
b := tc.block(t)
s := NewLazilyPersistentStoreColumn(nil, nil, enode.ID{}, numberOfColumns, nil)
commitmentsArray, err := s.required(b, slots.ToEpoch(tc.slot))
require.NoError(t, err)
for _, commitments := range commitmentsArray {
require.DeepEqual(t, tc.commitments, commitments)
}
})
}
}
func newSignedRoBlock(t *testing.T, signedBeaconBlock interface{}) blocks.ROBlock {
sb, err := blocks.NewSignedBeaconBlock(signedBeaconBlock)
require.NoError(t, err)
rb, err := blocks.NewROBlock(sb)
require.NoError(t, err)
return rb
}
type mockDataColumnsVerifier struct {
t *testing.T
dataColumnSidecars []blocks.RODataColumn
validCalled, SidecarInclusionProvenCalled, SidecarKzgProofVerifiedCalled bool
}
var _ verification.DataColumnsVerifier = &mockDataColumnsVerifier{}
func (m *mockDataColumnsVerifier) VerifiedRODataColumns() ([]blocks.VerifiedRODataColumn, error) {
require.Equal(m.t, true, m.validCalled && m.SidecarInclusionProvenCalled && m.SidecarKzgProofVerifiedCalled)
verifiedDataColumnSidecars := make([]blocks.VerifiedRODataColumn, 0, len(m.dataColumnSidecars))
for _, dataColumnSidecar := range m.dataColumnSidecars {
verifiedDataColumnSidecar := blocks.NewVerifiedRODataColumn(dataColumnSidecar)
verifiedDataColumnSidecars = append(verifiedDataColumnSidecars, verifiedDataColumnSidecar)
}
return verifiedDataColumnSidecars, nil
}
func (m *mockDataColumnsVerifier) SatisfyRequirement(verification.Requirement) {}
func (m *mockDataColumnsVerifier) ValidFields() error {
m.validCalled = true
return nil
}
func (m *mockDataColumnsVerifier) CorrectSubnet(dataColumnSidecarSubTopic string, expectedTopics []string) error {
return nil
}
func (m *mockDataColumnsVerifier) NotFromFutureSlot() error { return nil }
func (m *mockDataColumnsVerifier) SlotAboveFinalized() error { return nil }
func (m *mockDataColumnsVerifier) ValidProposerSignature(ctx context.Context) error { return nil }
func (m *mockDataColumnsVerifier) SidecarParentSeen(parentSeen func([fieldparams.RootLength]byte) bool) error {
return nil
}
func (m *mockDataColumnsVerifier) SidecarParentValid(badParent func([fieldparams.RootLength]byte) bool) error {
return nil
}
func (m *mockDataColumnsVerifier) SidecarParentSlotLower() error { return nil }
func (m *mockDataColumnsVerifier) SidecarDescendsFromFinalized() error { return nil }
func (m *mockDataColumnsVerifier) SidecarInclusionProven() error {
m.SidecarInclusionProvenCalled = true
return nil
}
func (m *mockDataColumnsVerifier) SidecarKzgProofVerified() error {
m.SidecarKzgProofVerifiedCalled = true
return nil
}
func (m *mockDataColumnsVerifier) SidecarProposerExpected(ctx context.Context) error { return nil }

View File

@@ -0,0 +1,40 @@
package das
import (
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
)
// Bisector describes a type that takes a set of RODataColumns via the Bisect method
// and returns a BisectionIterator that returns batches of those columns to be
// verified together.
type Bisector interface {
// Bisect initializes the BisectionIterator and returns the result.
Bisect([]blocks.RODataColumn) (BisectionIterator, error)
}
// BisectionIterator describes an iterator that returns groups of columns to verify.
// It is up to the bisector implementation to decide how to chunk up the columns,
// whether by block, by peer, or any other strategy. For example, backfill implements
// a bisector that keeps track of the source of each sidecar by peer, and groups
// sidecars by peer in the Next method, enabling it to track which peers, out of all
// the peers contributing to a batch, gave us bad data.
// When a batch fails, the OnError method should be used so that the bisector can
// keep track of the failed groups of columns and eg apply that knowledge in peer scoring.
// The same column will be returned multiple times by Next; first as part of a larger batch,
// and again as part of a more fine grained batch if there was an error in the large batch.
// For example, first as part of a batch of all columns spanning peers, and then again
// as part of a batch of columns from a single peer if some column in the larger batch
// failed verification.
type BisectionIterator interface {
// Next returns the next group of columns to verify.
// When the iteration is complete, Next should return (nil, io.EOF).
Next() ([]blocks.RODataColumn, error)
// OnError should be called when verification of a group of columns obtained via Next() fails.
OnError(error)
// Error can be used at the end of the iteration to get a single error result. It will return
// nil if OnError was never called, or an error of the implementers choosing representing the set
// of errors seen during iteration. For instance when bisecting from columns spanning peers to columns
// from a single peer, the broader error could be dropped, and then the more specific error
// (for a single peer's response) returned after bisecting to it.
Error() error
}

View File

@@ -76,7 +76,7 @@ func (e *blobCacheEntry) stash(sc *blocks.ROBlob) error {
e.scs = make([]*blocks.ROBlob, maxBlobsPerBlock)
}
if e.scs[sc.Index] != nil {
return errors.Wrapf(ErrDuplicateSidecar, "root=%#x, index=%d, commitment=%#x", sc.BlockRoot(), sc.Index, sc.KzgCommitment)
return errors.Wrapf(errDuplicateSidecar, "root=%#x, index=%d, commitment=%#x", sc.BlockRoot(), sc.Index, sc.KzgCommitment)
}
e.scs[sc.Index] = sc
return nil

View File

@@ -1,9 +1,7 @@
package das
import (
"bytes"
"slices"
"github.com/OffchainLabs/prysm/v7/beacon-chain/core/peerdas"
"github.com/OffchainLabs/prysm/v7/beacon-chain/db/filesystem"
fieldparams "github.com/OffchainLabs/prysm/v7/config/fieldparams"
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
@@ -11,9 +9,9 @@ import (
)
var (
ErrDuplicateSidecar = errors.New("duplicate sidecar stashed in AvailabilityStore")
errDuplicateSidecar = errors.New("duplicate sidecar stashed in AvailabilityStore")
errColumnIndexTooHigh = errors.New("column index too high")
errCommitmentMismatch = errors.New("KzgCommitment of sidecar in cache did not match block commitment")
errCommitmentMismatch = errors.New("commitment of sidecar in cache did not match block commitment")
errMissingSidecar = errors.New("no sidecar in cache for block commitment")
)
@@ -25,107 +23,80 @@ func newDataColumnCache() *dataColumnCache {
return &dataColumnCache{entries: make(map[cacheKey]*dataColumnCacheEntry)}
}
// ensure returns the entry for the given key, creating it if it isn't already present.
func (c *dataColumnCache) ensure(key cacheKey) *dataColumnCacheEntry {
// entry returns the entry for the given key, creating it if it isn't already present.
func (c *dataColumnCache) entry(key cacheKey) *dataColumnCacheEntry {
entry, ok := c.entries[key]
if !ok {
entry = &dataColumnCacheEntry{}
entry = newDataColumnCacheEntry(key.root)
c.entries[key] = entry
}
return entry
}
func (c *dataColumnCache) cleanup(blks []blocks.ROBlock) {
for _, block := range blks {
key := cacheKey{slot: block.Block().Slot(), root: block.Root()}
c.delete(key)
}
}
// delete removes the cache entry from the cache.
func (c *dataColumnCache) delete(key cacheKey) {
delete(c.entries, key)
}
// dataColumnCacheEntry holds a fixed-length cache of BlobSidecars.
type dataColumnCacheEntry struct {
scs [fieldparams.NumberOfColumns]*blocks.RODataColumn
diskSummary filesystem.DataColumnStorageSummary
func (c *dataColumnCache) stash(sc blocks.RODataColumn) error {
key := cacheKey{slot: sc.Slot(), root: sc.BlockRoot()}
entry := c.entry(key)
return entry.stash(sc)
}
func (e *dataColumnCacheEntry) setDiskSummary(sum filesystem.DataColumnStorageSummary) {
e.diskSummary = sum
func newDataColumnCacheEntry(root [32]byte) *dataColumnCacheEntry {
return &dataColumnCacheEntry{scs: make(map[uint64]blocks.RODataColumn), root: &root}
}
// dataColumnCacheEntry is the set of RODataColumns for a given block.
type dataColumnCacheEntry struct {
root *[32]byte
scs map[uint64]blocks.RODataColumn
}
// stash adds an item to the in-memory cache of DataColumnSidecars.
// Only the first DataColumnSidecar of a given Index will be kept in the cache.
// stash will return an error if the given data colunn is already in the cache, or if the Index is out of bounds.
func (e *dataColumnCacheEntry) stash(sc *blocks.RODataColumn) error {
// stash will return an error if the given data column Index is out of bounds.
// It will overwrite any existing entry for the same index.
func (e *dataColumnCacheEntry) stash(sc blocks.RODataColumn) error {
if sc.Index >= fieldparams.NumberOfColumns {
return errors.Wrapf(errColumnIndexTooHigh, "index=%d", sc.Index)
}
if e.scs[sc.Index] != nil {
return errors.Wrapf(ErrDuplicateSidecar, "root=%#x, index=%d, commitment=%#x", sc.BlockRoot(), sc.Index, sc.KzgCommitments)
}
e.scs[sc.Index] = sc
return nil
}
func (e *dataColumnCacheEntry) filter(root [32]byte, commitmentsArray *safeCommitmentsArray) ([]blocks.RODataColumn, error) {
nonEmptyIndices := commitmentsArray.nonEmptyIndices()
if e.diskSummary.AllAvailable(nonEmptyIndices) {
return nil, nil
// append appends the requested root and indices from the cache to the given sidecars slice and returns the result.
// If any of the given indices are missing, an error will be returned and the sidecars slice will be unchanged.
func (e *dataColumnCacheEntry) append(sidecars []blocks.RODataColumn, indices peerdas.ColumnIndices) ([]blocks.RODataColumn, error) {
needed := indices.ToMap()
for col := range needed {
_, ok := e.scs[col]
if !ok {
return nil, errors.Wrapf(errMissingSidecar, "root=%#x, index=%#x", e.root, col)
}
}
commitmentsCount := commitmentsArray.count()
sidecars := make([]blocks.RODataColumn, 0, commitmentsCount)
for i := range nonEmptyIndices {
if e.diskSummary.HasIndex(i) {
continue
}
if e.scs[i] == nil {
return nil, errors.Wrapf(errMissingSidecar, "root=%#x, index=%#x", root, i)
}
if !sliceBytesEqual(commitmentsArray[i], e.scs[i].KzgCommitments) {
return nil, errors.Wrapf(errCommitmentMismatch, "root=%#x, index=%#x, commitment=%#x, block commitment=%#x", root, i, e.scs[i].KzgCommitments, commitmentsArray[i])
}
sidecars = append(sidecars, *e.scs[i])
// Loop twice so we can avoid touching the slice if any of the blobs are missing.
for col := range needed {
sidecars = append(sidecars, e.scs[col])
}
return sidecars, nil
}
// safeCommitmentsArray is a fixed size array of commitments.
// This is helpful for avoiding gratuitous bounds checks.
type safeCommitmentsArray [fieldparams.NumberOfColumns][][]byte
// count returns the number of commitments in the array.
func (s *safeCommitmentsArray) count() int {
count := 0
for i := range s {
if s[i] != nil {
count++
// IndicesNotStored filters the list of indices to only include those that are not found in the storage summary.
func IndicesNotStored(sum filesystem.DataColumnStorageSummary, indices peerdas.ColumnIndices) peerdas.ColumnIndices {
indices = indices.Copy()
for col := range indices {
if sum.HasIndex(col) {
indices.Unset(col)
}
}
return count
}
// nonEmptyIndices returns a map of indices that are non-nil in the array.
func (s *safeCommitmentsArray) nonEmptyIndices() map[uint64]bool {
columns := make(map[uint64]bool)
for i := range s {
if s[i] != nil {
columns[uint64(i)] = true
}
}
return columns
}
func sliceBytesEqual(a, b [][]byte) bool {
return slices.EqualFunc(a, b, bytes.Equal)
return indices
}

View File

@@ -1,8 +1,10 @@
package das
import (
"slices"
"testing"
"github.com/OffchainLabs/prysm/v7/beacon-chain/core/peerdas"
"github.com/OffchainLabs/prysm/v7/beacon-chain/db/filesystem"
fieldparams "github.com/OffchainLabs/prysm/v7/config/fieldparams"
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
@@ -13,124 +15,105 @@ import (
func TestEnsureDeleteSetDiskSummary(t *testing.T) {
c := newDataColumnCache()
key := cacheKey{}
entry := c.ensure(key)
require.DeepEqual(t, dataColumnCacheEntry{}, *entry)
entry := c.entry(key)
require.Equal(t, 0, len(entry.scs))
diskSummary := filesystem.NewDataColumnStorageSummary(42, [fieldparams.NumberOfColumns]bool{true})
entry.setDiskSummary(diskSummary)
entry = c.ensure(key)
require.DeepEqual(t, dataColumnCacheEntry{diskSummary: diskSummary}, *entry)
nonDupe := c.entry(key)
require.Equal(t, entry, nonDupe) // same pointer
expect, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, []util.DataColumnParam{{Index: 1}})
require.NoError(t, entry.stash(expect[0]))
require.Equal(t, 1, len(entry.scs))
cols, err := nonDupe.append([]blocks.RODataColumn{}, peerdas.NewColumnIndicesFromSlice([]uint64{expect[0].Index}))
require.NoError(t, err)
require.DeepEqual(t, expect[0], cols[0])
c.delete(key)
entry = c.ensure(key)
require.DeepEqual(t, dataColumnCacheEntry{}, *entry)
entry = c.entry(key)
require.Equal(t, 0, len(entry.scs))
require.NotEqual(t, entry, nonDupe) // different pointer
}
func TestStash(t *testing.T) {
t.Run("Index too high", func(t *testing.T) {
roDataColumns, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, []util.DataColumnParam{{Index: 10_000}})
columns, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, []util.DataColumnParam{{Index: 10_000}})
var entry dataColumnCacheEntry
err := entry.stash(&roDataColumns[0])
err := entry.stash(columns[0])
require.NotNil(t, err)
})
t.Run("Nominal and already existing", func(t *testing.T) {
roDataColumns, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, []util.DataColumnParam{{Index: 1}})
var entry dataColumnCacheEntry
err := entry.stash(&roDataColumns[0])
entry := newDataColumnCacheEntry(roDataColumns[0].BlockRoot())
err := entry.stash(roDataColumns[0])
require.NoError(t, err)
require.DeepEqual(t, roDataColumns[0], entry.scs[1])
err = entry.stash(&roDataColumns[0])
require.NotNil(t, err)
require.NoError(t, entry.stash(roDataColumns[0]))
// stash simply replaces duplicate values now
require.DeepEqual(t, roDataColumns[0], entry.scs[1])
})
}
func TestFilterDataColumns(t *testing.T) {
func TestAppendDataColumns(t *testing.T) {
t.Run("All available", func(t *testing.T) {
commitmentsArray := safeCommitmentsArray{nil, [][]byte{[]byte{1}}, nil, [][]byte{[]byte{3}}}
diskSummary := filesystem.NewDataColumnStorageSummary(42, [fieldparams.NumberOfColumns]bool{false, true, false, true})
dataColumnCacheEntry := dataColumnCacheEntry{diskSummary: diskSummary}
actual, err := dataColumnCacheEntry.filter([fieldparams.RootLength]byte{}, &commitmentsArray)
sum := filesystem.NewDataColumnStorageSummary(42, [fieldparams.NumberOfColumns]bool{false, true, false, true})
notStored := IndicesNotStored(sum, peerdas.NewColumnIndicesFromSlice([]uint64{1, 3}))
actual, err := newDataColumnCacheEntry([32]byte{}).append([]blocks.RODataColumn{}, notStored)
require.NoError(t, err)
require.IsNil(t, actual)
require.Equal(t, 0, len(actual))
})
t.Run("Some scs missing", func(t *testing.T) {
commitmentsArray := safeCommitmentsArray{nil, [][]byte{[]byte{1}}}
sum := filesystem.NewDataColumnStorageSummary(42, [fieldparams.NumberOfColumns]bool{})
diskSummary := filesystem.NewDataColumnStorageSummary(42, [fieldparams.NumberOfColumns]bool{})
dataColumnCacheEntry := dataColumnCacheEntry{diskSummary: diskSummary}
_, err := dataColumnCacheEntry.filter([fieldparams.RootLength]byte{}, &commitmentsArray)
require.NotNil(t, err)
})
t.Run("Commitments not equal", func(t *testing.T) {
commitmentsArray := safeCommitmentsArray{nil, [][]byte{[]byte{1}}}
roDataColumns, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, []util.DataColumnParam{{Index: 1}})
var scs [fieldparams.NumberOfColumns]*blocks.RODataColumn
scs[1] = &roDataColumns[0]
dataColumnCacheEntry := dataColumnCacheEntry{scs: scs}
_, err := dataColumnCacheEntry.filter(roDataColumns[0].BlockRoot(), &commitmentsArray)
notStored := IndicesNotStored(sum, peerdas.NewColumnIndicesFromSlice([]uint64{1}))
actual, err := newDataColumnCacheEntry([32]byte{}).append([]blocks.RODataColumn{}, notStored)
require.Equal(t, 0, len(actual))
require.NotNil(t, err)
})
t.Run("Nominal", func(t *testing.T) {
commitmentsArray := safeCommitmentsArray{nil, [][]byte{[]byte{1}}, nil, [][]byte{[]byte{3}}}
diskSummary := filesystem.NewDataColumnStorageSummary(42, [fieldparams.NumberOfColumns]bool{false, true})
indices := peerdas.NewColumnIndicesFromSlice([]uint64{1, 3})
expected, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, []util.DataColumnParam{{Index: 3, KzgCommitments: [][]byte{[]byte{3}}}})
var scs [fieldparams.NumberOfColumns]*blocks.RODataColumn
scs[3] = &expected[0]
scs := map[uint64]blocks.RODataColumn{
3: expected[0],
}
sum := filesystem.NewDataColumnStorageSummary(42, [fieldparams.NumberOfColumns]bool{false, true})
entry := dataColumnCacheEntry{scs: scs}
dataColumnCacheEntry := dataColumnCacheEntry{scs: scs, diskSummary: diskSummary}
actual, err := dataColumnCacheEntry.filter(expected[0].BlockRoot(), &commitmentsArray)
actual, err := entry.append([]blocks.RODataColumn{}, IndicesNotStored(sum, indices))
require.NoError(t, err)
require.DeepEqual(t, expected, actual)
})
}
func TestCount(t *testing.T) {
s := safeCommitmentsArray{nil, [][]byte{[]byte{1}}, nil, [][]byte{[]byte{3}}}
require.Equal(t, 2, s.count())
}
t.Run("Append does not mutate the input", func(t *testing.T) {
indices := peerdas.NewColumnIndicesFromSlice([]uint64{1, 2})
expected, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, []util.DataColumnParam{
{Index: 0, KzgCommitments: [][]byte{[]byte{1}}},
{Index: 1, KzgCommitments: [][]byte{[]byte{2}}},
{Index: 2, KzgCommitments: [][]byte{[]byte{3}}},
})
func TestNonEmptyIndices(t *testing.T) {
s := safeCommitmentsArray{nil, [][]byte{[]byte{10}}, nil, [][]byte{[]byte{20}}}
actual := s.nonEmptyIndices()
require.DeepEqual(t, map[uint64]bool{1: true, 3: true}, actual)
}
scs := map[uint64]blocks.RODataColumn{
1: expected[1],
2: expected[2],
}
entry := dataColumnCacheEntry{scs: scs}
func TestSliceBytesEqual(t *testing.T) {
t.Run("Different lengths", func(t *testing.T) {
a := [][]byte{[]byte{1, 2, 3}}
b := [][]byte{[]byte{1, 2, 3}, []byte{4, 5, 6}}
require.Equal(t, false, sliceBytesEqual(a, b))
})
t.Run("Same length but different content", func(t *testing.T) {
a := [][]byte{[]byte{1, 2, 3}, []byte{4, 5, 6}}
b := [][]byte{[]byte{1, 2, 3}, []byte{4, 5, 7}}
require.Equal(t, false, sliceBytesEqual(a, b))
})
t.Run("Equal slices", func(t *testing.T) {
a := [][]byte{[]byte{1, 2, 3}, []byte{4, 5, 6}}
b := [][]byte{[]byte{1, 2, 3}, []byte{4, 5, 6}}
require.Equal(t, true, sliceBytesEqual(a, b))
original := []blocks.RODataColumn{expected[0]}
actual, err := entry.append(original, indices)
require.NoError(t, err)
require.Equal(t, len(expected), len(actual))
slices.SortFunc(actual, func(i, j blocks.RODataColumn) int {
return int(i.Index) - int(j.Index)
})
for i := range expected {
require.Equal(t, expected[i].Index, actual[i].Index)
}
require.Equal(t, 1, len(original))
})
}

View File

@@ -7,13 +7,10 @@ import (
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
)
// AvailabilityStore describes a component that can verify and save sidecars for a given block, and confirm previously
// verified and saved sidecars.
// Persist guarantees that the sidecar will be available to perform a DA check
// for the life of the beacon node process.
// IsDataAvailable guarantees that all blobs committed to in the block have been
// durably persisted before returning a non-error value.
type AvailabilityStore interface {
IsDataAvailable(ctx context.Context, current primitives.Slot, b blocks.ROBlock) error
Persist(current primitives.Slot, blobSidecar ...blocks.ROBlob) error
// AvailabilityChecker is the minimum interface needed to check if data is available for a block.
// By convention there is a concept of an AvailabilityStore that implements a method to persist
// blobs or data columns to prepare for Availability checking, but since those methods are different
// for different forms of blob data, they are not included in the interface.
type AvailabilityChecker interface {
IsDataAvailable(ctx context.Context, current primitives.Slot, b ...blocks.ROBlock) error
}

5
beacon-chain/das/log.go Normal file
View File

@@ -0,0 +1,5 @@
package das
import "github.com/sirupsen/logrus"
var log = logrus.WithField("prefix", "das")

View File

@@ -9,16 +9,20 @@ import (
// MockAvailabilityStore is an implementation of AvailabilityStore that can be used by other packages in tests.
type MockAvailabilityStore struct {
VerifyAvailabilityCallback func(ctx context.Context, current primitives.Slot, b blocks.ROBlock) error
VerifyAvailabilityCallback func(ctx context.Context, current primitives.Slot, b ...blocks.ROBlock) error
ErrIsDataAvailable error
PersistBlobsCallback func(current primitives.Slot, blobSidecar ...blocks.ROBlob) error
}
var _ AvailabilityStore = &MockAvailabilityStore{}
var _ AvailabilityChecker = &MockAvailabilityStore{}
// IsDataAvailable satisfies the corresponding method of the AvailabilityStore interface in a way that is useful for tests.
func (m *MockAvailabilityStore) IsDataAvailable(ctx context.Context, current primitives.Slot, b blocks.ROBlock) error {
func (m *MockAvailabilityStore) IsDataAvailable(ctx context.Context, current primitives.Slot, b ...blocks.ROBlock) error {
if m.ErrIsDataAvailable != nil {
return m.ErrIsDataAvailable
}
if m.VerifyAvailabilityCallback != nil {
return m.VerifyAvailabilityCallback(ctx, current, b)
return m.VerifyAvailabilityCallback(ctx, current, b...)
}
return nil
}

View File

@@ -1124,7 +1124,7 @@ func (b *BeaconNode) registerPrunerService(cliCtx *cli.Context) error {
func (b *BeaconNode) RegisterBackfillService(cliCtx *cli.Context, bfs *backfill.Store) error {
pa := peers.NewAssigner(b.fetchP2P().Peers(), b.forkChoicer)
bf, err := backfill.NewService(cliCtx.Context, bfs, b.BlobStorage, b.clockWaiter, b.fetchP2P(), pa, b.BackfillOpts...)
bf, err := backfill.NewService(cliCtx.Context, bfs, b.BlobStorage, b.DataColumnStorage, b.clockWaiter, b.fetchP2P(), pa, b.BackfillOpts...)
if err != nil {
return errors.Wrap(err, "error initializing backfill service")
}

View File

@@ -142,6 +142,7 @@ go_test(
"topics_test.go",
"utils_test.go",
],
data = glob(["testdata/**"]),
embed = [":go_default_library"],
flaky = True,
tags = ["requires-network"],

View File

@@ -42,7 +42,7 @@ func (a *Assigner) freshPeers() ([]peer.ID, error) {
if flags.Get().MinimumSyncPeers < required {
required = flags.Get().MinimumSyncPeers
}
_, peers := a.ps.BestFinalized(params.BeaconConfig().MaxPeersToSync, a.fc.FinalizedCheckpoint().Epoch)
_, peers := a.ps.BestFinalized(a.fc.FinalizedCheckpoint().Epoch)
if len(peers) < required {
log.WithFields(logrus.Fields{
"suitable": len(peers),
@@ -52,27 +52,31 @@ func (a *Assigner) freshPeers() ([]peer.ID, error) {
return peers, nil
}
// AssignmentFilter describes a function that takes a list of peer.IDs and returns a filtered subset.
// An example is the NotBusy filter.
type AssignmentFilter func([]peer.ID) []peer.ID
// Assign uses the "BestFinalized" method to select the best peers that agree on a canonical block
// for the configured finalized epoch. At most `n` peers will be returned. The `busy` param can be used
// to filter out peers that we know we don't want to connect to, for instance if we are trying to limit
// the number of outbound requests to each peer from a given component.
func (a *Assigner) Assign(busy map[peer.ID]bool, n int) ([]peer.ID, error) {
func (a *Assigner) Assign(filter AssignmentFilter) ([]peer.ID, error) {
best, err := a.freshPeers()
if err != nil {
return nil, err
}
return pickBest(busy, n, best), nil
return filter(best), nil
}
func pickBest(busy map[peer.ID]bool, n int, best []peer.ID) []peer.ID {
ps := make([]peer.ID, 0, n)
for _, p := range best {
if len(ps) == n {
return ps
}
if !busy[p] {
ps = append(ps, p)
// NotBusy is a filter that returns the list of peer.IDs that are not in the `busy` map.
func NotBusy(busy map[peer.ID]bool) AssignmentFilter {
return func(peers []peer.ID) []peer.ID {
ps := make([]peer.ID, 0, len(peers))
for _, p := range peers {
if !busy[p] {
ps = append(ps, p)
}
}
return ps
}
return ps
}

View File

@@ -13,82 +13,68 @@ func TestPickBest(t *testing.T) {
cases := []struct {
name string
busy map[peer.ID]bool
n int
best []peer.ID
expected []peer.ID
}{
{
name: "",
n: 0,
name: "don't limit",
expected: best,
},
{
name: "none busy",
n: 1,
expected: best[0:1],
expected: best,
},
{
name: "all busy except last",
n: 1,
busy: testBusyMap(best[0 : len(best)-1]),
expected: best[len(best)-1:],
},
{
name: "all busy except i=5",
n: 1,
busy: testBusyMap(append(append([]peer.ID{}, best[0:5]...), best[6:]...)),
expected: []peer.ID{best[5]},
},
{
name: "all busy - 0 results",
n: 1,
busy: testBusyMap(best),
},
{
name: "first half busy",
n: 5,
busy: testBusyMap(best[0:5]),
expected: best[5:],
},
{
name: "back half busy",
n: 5,
busy: testBusyMap(best[5:]),
expected: best[0:5],
},
{
name: "pick all ",
n: 10,
expected: best,
},
{
name: "none available",
n: 10,
best: []peer.ID{},
},
{
name: "not enough",
n: 10,
best: best[0:1],
expected: best[0:1],
},
{
name: "not enough, some busy",
n: 10,
best: best[0:6],
busy: testBusyMap(best[0:5]),
expected: best[5:6],
},
}
for _, c := range cases {
name := fmt.Sprintf("n=%d", c.n)
if c.name != "" {
name += " " + c.name
}
t.Run(name, func(t *testing.T) {
t.Run(c.name, func(t *testing.T) {
if c.best == nil {
c.best = best
}
pb := pickBest(c.busy, c.n, c.best)
filt := NotBusy(c.busy)
pb := filt(c.best)
require.Equal(t, len(c.expected), len(pb))
for i := range c.expected {
require.Equal(t, c.expected[i], pb[i])

View File

@@ -710,76 +710,54 @@ func (p *Status) deprecatedPrune() {
p.tallyIPTracker()
}
// BestFinalized returns the highest finalized epoch equal to or higher than `ourFinalizedEpoch`
// that is agreed upon by the majority of peers, and the peers agreeing on this finalized epoch.
// This method may not return the absolute highest finalized epoch, but the finalized epoch in which
// most peers can serve blocks (plurality voting). Ideally, all peers would be reporting the same
// finalized epoch but some may be behind due to their own latency, or because of their finalized
// epoch at the time we queried them.
func (p *Status) BestFinalized(maxPeers int, ourFinalizedEpoch primitives.Epoch) (primitives.Epoch, []peer.ID) {
// Retrieve all connected peers.
// BestFinalized groups all peers by their last known finalized epoch
// and selects the epoch of the largest group as best.
// Any peer with a finalized epoch < ourFinalized is excluded from consideration.
// In the event of a tie in largest group size, the higher epoch is the tie breaker.
// The selected epoch is returned, along with a list of peers with a finalized epoch >= the selected epoch.
func (p *Status) BestFinalized(ourFinalized primitives.Epoch) (primitives.Epoch, []peer.ID) {
connected := p.Connected()
pids := make([]peer.ID, 0, len(connected))
views := make(map[peer.ID]*pb.StatusV2, len(connected))
// key: finalized epoch, value: number of peers that support this finalized epoch.
finalizedEpochVotes := make(map[primitives.Epoch]uint64)
// key: peer ID, value: finalized epoch of the peer.
pidEpoch := make(map[peer.ID]primitives.Epoch, len(connected))
// key: peer ID, value: head slot of the peer.
pidHead := make(map[peer.ID]primitives.Slot, len(connected))
potentialPIDs := make([]peer.ID, 0, len(connected))
votes := make(map[primitives.Epoch]uint64)
winner := primitives.Epoch(0)
for _, pid := range connected {
peerChainState, err := p.ChainState(pid)
// Skip if the peer's finalized epoch is not defined, or if the peer's finalized epoch is
// lower than ours.
if err != nil || peerChainState == nil || peerChainState.FinalizedEpoch < ourFinalizedEpoch {
view, err := p.ChainState(pid)
if err != nil || view == nil || view.FinalizedEpoch < ourFinalized {
continue
}
pids = append(pids, pid)
views[pid] = view
finalizedEpochVotes[peerChainState.FinalizedEpoch]++
pidEpoch[pid] = peerChainState.FinalizedEpoch
pidHead[pid] = peerChainState.HeadSlot
potentialPIDs = append(potentialPIDs, pid)
}
// Select the target epoch, which is the epoch most peers agree upon.
// If there is a tie, select the highest epoch.
targetEpoch, mostVotes := primitives.Epoch(0), uint64(0)
for epoch, count := range finalizedEpochVotes {
if count > mostVotes || (count == mostVotes && epoch > targetEpoch) {
mostVotes = count
targetEpoch = epoch
votes[view.FinalizedEpoch]++
if winner == 0 {
winner = view.FinalizedEpoch
continue
}
e, v := view.FinalizedEpoch, votes[view.FinalizedEpoch]
if v > votes[winner] || v == votes[winner] && e > winner {
winner = e
}
}
// Sort PIDs by finalized (epoch, head), in decreasing order.
sort.Slice(potentialPIDs, func(i, j int) bool {
if pidEpoch[potentialPIDs[i]] == pidEpoch[potentialPIDs[j]] {
return pidHead[potentialPIDs[i]] > pidHead[potentialPIDs[j]]
// Descending sort by (finalized, head).
sort.Slice(pids, func(i, j int) bool {
iv, jv := views[pids[i]], views[pids[j]]
if iv.FinalizedEpoch == jv.FinalizedEpoch {
return iv.HeadSlot > jv.HeadSlot
}
return pidEpoch[potentialPIDs[i]] > pidEpoch[potentialPIDs[j]]
return iv.FinalizedEpoch > jv.FinalizedEpoch
})
// Trim potential peers to those on or after target epoch.
for i, pid := range potentialPIDs {
if pidEpoch[pid] < targetEpoch {
potentialPIDs = potentialPIDs[:i]
break
}
}
// Find the first peer with finalized epoch < winner, trim and all following (lower) peers.
trim := sort.Search(len(pids), func(i int) bool {
return views[pids[i]].FinalizedEpoch < winner
})
pids = pids[:trim]
// Trim potential peers to at most maxPeers.
if len(potentialPIDs) > maxPeers {
potentialPIDs = potentialPIDs[:maxPeers]
}
return targetEpoch, potentialPIDs
return winner, pids
}
// BestNonFinalized returns the highest known epoch, higher than ours,

View File

@@ -654,9 +654,10 @@ func TestTrimmedOrderedPeers(t *testing.T) {
FinalizedRoot: mockroot2[:],
})
target, pids := p.BestFinalized(maxPeers, 0)
target, pids := p.BestFinalized(0)
assert.Equal(t, expectedTarget, target, "Incorrect target epoch retrieved")
assert.Equal(t, maxPeers, len(pids), "Incorrect number of peers retrieved")
// addPeer called 5 times above
assert.Equal(t, 5, len(pids), "Incorrect number of peers retrieved")
// Expect the returned list to be ordered by finalized epoch and trimmed to max peers.
assert.Equal(t, pid3, pids[0], "Incorrect first peer")
@@ -1017,7 +1018,10 @@ func TestStatus_BestPeer(t *testing.T) {
HeadSlot: peerConfig.headSlot,
})
}
epoch, pids := p.BestFinalized(tt.limitPeers, tt.ourFinalizedEpoch)
epoch, pids := p.BestFinalized(tt.ourFinalizedEpoch)
if len(pids) > tt.limitPeers {
pids = pids[:tt.limitPeers]
}
assert.Equal(t, tt.targetEpoch, epoch, "Unexpected epoch retrieved")
assert.Equal(t, tt.targetEpochSupport, len(pids), "Unexpected number of peers supporting retrieved epoch")
})
@@ -1044,7 +1048,10 @@ func TestBestFinalized_returnsMaxValue(t *testing.T) {
})
}
_, pids := p.BestFinalized(maxPeers, 0)
_, pids := p.BestFinalized(0)
if len(pids) > maxPeers {
pids = pids[:maxPeers]
}
assert.Equal(t, maxPeers, len(pids), "Wrong number of peers returned")
}

View File

@@ -7,6 +7,7 @@ go_library(
"block_batcher.go",
"context.go",
"custody.go",
"data_column_assignment.go",
"data_column_sidecars.go",
"data_columns_reconstruct.go",
"deadlines.go",
@@ -135,6 +136,7 @@ go_library(
"//time:go_default_library",
"//time/slots:go_default_library",
"@com_github_ethereum_go_ethereum//common/hexutil:go_default_library",
"@com_github_ethereum_go_ethereum//p2p/enode:go_default_library",
"@com_github_hashicorp_golang_lru//:go_default_library",
"@com_github_libp2p_go_libp2p//core:go_default_library",
"@com_github_libp2p_go_libp2p//core/host:go_default_library",

View File

@@ -6,17 +6,22 @@ go_library(
"batch.go",
"batcher.go",
"blobs.go",
"columns.go",
"error.go",
"fulu_transition.go",
"log.go",
"metrics.go",
"pool.go",
"service.go",
"status.go",
"verify.go",
"verify_column.go",
"worker.go",
],
importpath = "github.com/OffchainLabs/prysm/v7/beacon-chain/sync/backfill",
visibility = ["//visibility:public"],
deps = [
"//beacon-chain/core/peerdas:go_default_library",
"//beacon-chain/core/signing:go_default_library",
"//beacon-chain/das:go_default_library",
"//beacon-chain/db:go_default_library",
@@ -53,17 +58,23 @@ go_test(
"batch_test.go",
"batcher_test.go",
"blobs_test.go",
"columns_test.go",
"fulu_transition_test.go",
"log_test.go",
"pool_test.go",
"service_test.go",
"status_test.go",
"verify_column_test.go",
"verify_test.go",
],
embed = [":go_default_library"],
deps = [
"//beacon-chain/core/peerdas:go_default_library",
"//beacon-chain/core/signing:go_default_library",
"//beacon-chain/das:go_default_library",
"//beacon-chain/db:go_default_library",
"//beacon-chain/db/filesystem:go_default_library",
"//beacon-chain/p2p/peers:go_default_library",
"//beacon-chain/p2p/testing:go_default_library",
"//beacon-chain/startup:go_default_library",
"//beacon-chain/state:go_default_library",
@@ -85,5 +96,7 @@ go_test(
"@com_github_ethereum_go_ethereum//common/hexutil:go_default_library",
"@com_github_libp2p_go_libp2p//core/peer:go_default_library",
"@com_github_pkg_errors//:go_default_library",
"@com_github_sirupsen_logrus//:go_default_library",
"@com_github_stretchr_testify//require:go_default_library",
],
)

View File

@@ -6,9 +6,7 @@ import (
"sort"
"time"
"github.com/OffchainLabs/prysm/v7/beacon-chain/das"
"github.com/OffchainLabs/prysm/v7/beacon-chain/sync"
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
eth "github.com/OffchainLabs/prysm/v7/proto/prysm/v1alpha1"
"github.com/libp2p/go-libp2p/core/peer"
@@ -16,9 +14,7 @@ import (
"github.com/sirupsen/logrus"
)
// ErrChainBroken indicates a backfill batch can't be imported to the db because it is not known to be the ancestor
// of the canonical chain.
var ErrChainBroken = errors.New("batch is not the ancestor of a known finalized root")
var errChainBroken = errors.New("batch is not the ancestor of a known finalized root")
type batchState int
@@ -30,16 +26,20 @@ func (s batchState) String() string {
return "init"
case batchSequenced:
return "sequenced"
case batchErrRetryable:
return "error_retryable"
case batchSyncBlobs:
return "sync_blobs"
case batchSyncColumns:
return "sync_columns"
case batchImportable:
return "importable"
case batchImportComplete:
return "import_complete"
case batchEndSequence:
return "end_sequence"
case batchBlobSync:
return "blob_sync"
case batchErrRetryable:
return "error_retryable"
case batchErrFatal:
return "error_fatal"
default:
return "unknown"
}
@@ -49,10 +49,12 @@ const (
batchNil batchState = iota
batchInit
batchSequenced
batchErrRetryable
batchBlobSync
batchSyncBlobs
batchSyncColumns
batchImportable
batchImportComplete
batchErrRetryable
batchErrFatal // if this is received in the main loop, the worker pool will be shut down.
batchEndSequence
)
@@ -67,14 +69,17 @@ type batch struct {
retries int
retryAfter time.Time
begin primitives.Slot
end primitives.Slot // half-open interval, [begin, end), ie >= start, < end.
results verifiedROBlocks
end primitives.Slot // half-open interval, [begin, end), ie >= begin, < end.
blocks verifiedROBlocks
err error
state batchState
busy peer.ID
blockPid peer.ID
blobPid peer.ID
bs *blobSync
// `assignedPeer` is used by the worker pool to assign and unassign peer.IDs to serve requests for the current batch state.
// Depending on the state it will be copied to blockPeer, columns.Peer, blobs.Peer.
assignedPeer peer.ID
blockPeer peer.ID
nextReqCols []uint64
blobs *blobSync
columns *columnSync
}
func (b batch) logFields() logrus.Fields {
@@ -86,16 +91,29 @@ func (b batch) logFields() logrus.Fields {
"retries": b.retries,
"begin": b.begin,
"end": b.end,
"busyPid": b.busy,
"blockPid": b.blockPid,
"blobPid": b.blobPid,
"busyPid": b.assignedPeer,
"blockPid": b.blockPeer,
}
if b.blobs != nil {
f["blobPid"] = b.blobs.peer
}
if b.columns != nil {
f["colPid"] = b.columns.peer
}
if b.retries > 0 {
f["retryAfter"] = b.retryAfter.String()
}
if b.state == batchSyncColumns {
f["nextColumns"] = fmt.Sprintf("%v", b.nextReqCols)
}
if b.state == batchErrRetryable && b.blobs != nil {
f["blobsMissing"] = b.blobs.needed()
}
return f
}
// replaces returns true if `r` is a version of `b` that has been updated by a worker,
// meaning it should replace `b` in the batch sequencing queue.
func (b batch) replaces(r batch) bool {
if r.state == batchImportComplete {
return false
@@ -114,9 +132,9 @@ func (b batch) id() batchId {
}
func (b batch) ensureParent(expected [32]byte) error {
tail := b.results[len(b.results)-1]
tail := b.blocks[len(b.blocks)-1]
if tail.Root() != expected {
return errors.Wrapf(ErrChainBroken, "last parent_root=%#x, tail root=%#x", expected, tail.Root())
return errors.Wrapf(errChainBroken, "last parent_root=%#x, tail root=%#x", expected, tail.Root())
}
return nil
}
@@ -136,21 +154,15 @@ func (b batch) blobRequest() *eth.BlobSidecarsByRangeRequest {
}
}
func (b batch) withResults(results verifiedROBlocks, bs *blobSync) batch {
b.results = results
b.bs = bs
if bs.blobsNeeded() > 0 {
return b.withState(batchBlobSync)
func (b batch) transitionToNext() batch {
if len(b.blocks) == 0 {
return b.withState(batchSequenced)
}
return b.withState(batchImportable)
}
func (b batch) postBlobSync() batch {
if b.blobsNeeded() > 0 {
log.WithFields(b.logFields()).WithField("blobsMissing", b.blobsNeeded()).Error("Batch still missing blobs after downloading from peer")
b.bs = nil
b.results = []blocks.ROBlock{}
return b.withState(batchErrRetryable)
if len(b.columns.columnsNeeded()) > 0 {
return b.withState(batchSyncColumns)
}
if b.blobs != nil && b.blobs.needed() > 0 {
return b.withState(batchSyncBlobs)
}
return b.withState(batchImportable)
}
@@ -176,27 +188,35 @@ func (b batch) withState(s batchState) batch {
return b
}
func (b batch) withPeer(p peer.ID) batch {
b.blockPid = p
backfillBatchTimeWaiting.Observe(float64(time.Since(b.scheduled).Milliseconds()))
return b
}
func (b batch) withRetryableError(err error) batch {
log.WithFields(b.logFields()).WithError(err).Warn("Could not proceed with batch processing due to error")
b.err = err
return b.withState(batchErrRetryable)
}
func (b batch) blobsNeeded() int {
return b.bs.blobsNeeded()
func (b batch) withFatalError(err error) batch {
log.WithFields(b.logFields()).WithError(err).Error("Fatal batch processing error")
b.err = err
return b.withState(batchErrFatal)
}
func (b batch) blobResponseValidator() sync.BlobResponseValidation {
return b.bs.validateNext
func (b batch) withError(err error) batch {
if isRetryable(err) {
return b.withRetryableError(err)
}
return b.withFatalError(err)
}
func (b batch) availabilityStore() das.AvailabilityStore {
return b.bs.store
func (b batch) validatingColumnRequest(cb *columnBisector) *validatingColumnRequest {
req := b.columns.request(b.nextReqCols)
if req == nil {
return nil
}
return &validatingColumnRequest{
req: req,
columnSync: b.columns,
bisector: cb,
}
}
var batchBlockUntil = func(ctx context.Context, untilRetry time.Duration, b batch) error {
@@ -223,6 +243,18 @@ func (b batch) waitUntilReady(ctx context.Context) error {
return nil
}
func (b batch) workComplete() bool {
return b.state == batchImportable
}
func (b batch) selectPeer(picker *sync.PeerPicker, busy map[peer.ID]bool) (peer.ID, []uint64, error) {
if b.state == batchSyncColumns {
return picker.ForColumns(b.columns.columnsNeeded(), busy)
}
peer, err := picker.ForBlocks(busy)
return peer, nil, err
}
func sortBatchDesc(bb []batch) {
sort.Slice(bb, func(i, j int) bool {
return bb[i].end > bb[j].end

View File

@@ -12,6 +12,7 @@ import (
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
"github.com/OffchainLabs/prysm/v7/encoding/bytesutil"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/pkg/errors"
)
@@ -48,17 +49,24 @@ func newBlobSync(current primitives.Slot, vbs verifiedROBlocks, cfg *blobSyncCon
type blobVerifierMap map[[32]byte][]verification.BlobVerifier
type blobSync struct {
store das.AvailabilityStore
store *das.LazilyPersistentStoreBlob
expected []blobSummary
next int
bbv *blobBatchVerifier
current primitives.Slot
peer peer.ID
}
func (bs *blobSync) blobsNeeded() int {
func (bs *blobSync) needed() int {
return len(bs.expected) - bs.next
}
// validateNext is given to the RPC request code as one of the a validation callbacks.
// It orchestrates setting up the batch verifier (blobBatchVerifier) and calls Persist on the
// AvailabilityStore. This enables the rest of the code in between RPC and the AvailabilityStore
// to stay decoupled from each other. The AvailabilityStore holds the blobs in memory between the
// call to Persist, and the call to IsDataAvailable (where the blobs are actually written to disk
// if successfully verified).
func (bs *blobSync) validateNext(rb blocks.ROBlob) error {
if bs.next >= len(bs.expected) {
return errUnexpectedResponseSize
@@ -102,6 +110,7 @@ func newBlobBatchVerifier(nbv verification.NewBlobVerifier) *blobBatchVerifier {
return &blobBatchVerifier{newBlobVerifier: nbv, verifiers: make(blobVerifierMap)}
}
// blobBatchVerifier implements the BlobBatchVerifier interface required by the das store.
type blobBatchVerifier struct {
newBlobVerifier verification.NewBlobVerifier
verifiers blobVerifierMap
@@ -117,6 +126,7 @@ func (bbv *blobBatchVerifier) newVerifier(rb blocks.ROBlob) verification.BlobVer
return m[rb.Index]
}
// VerifiedROBlobs satisfies the BlobBatchVerifier interface expected by the AvailabilityChecker
func (bbv *blobBatchVerifier) VerifiedROBlobs(_ context.Context, blk blocks.ROBlock, _ []blocks.ROBlob) ([]blocks.VerifiedROBlob, error) {
m, ok := bbv.verifiers[blk.Root()]
if !ok {

View File

@@ -0,0 +1,271 @@
package backfill
import (
"bytes"
"context"
"fmt"
"sort"
"time"
"github.com/OffchainLabs/prysm/v7/beacon-chain/core/peerdas"
"github.com/OffchainLabs/prysm/v7/beacon-chain/das"
"github.com/OffchainLabs/prysm/v7/beacon-chain/db/filesystem"
"github.com/OffchainLabs/prysm/v7/beacon-chain/p2p"
"github.com/OffchainLabs/prysm/v7/beacon-chain/sync"
"github.com/OffchainLabs/prysm/v7/config/params"
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
ethpb "github.com/OffchainLabs/prysm/v7/proto/prysm/v1alpha1"
"github.com/OffchainLabs/prysm/v7/time/slots"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/pkg/errors"
)
var (
errInvalidDataColumnResponse = errors.New("invalid DataColumnSidecar response")
errUnexpectedBlockRoot = errors.Wrap(errInvalidDataColumnResponse, "unexpected sidecar block root")
errCommitmentLengthMismatch = errors.Wrap(errInvalidDataColumnResponse, "sidecar has different commitment count than block")
errCommitmentValueMismatch = errors.Wrap(errInvalidDataColumnResponse, "sidecar commitments do not match block")
)
// tune the amount of columns we try to download from peers at once.
// The spec limit is 128 * 32, but connection errors are more likely when
// requesting so much at once.
const columnRequestLimit = 128 * 4
type columnBatch struct {
first primitives.Slot
last primitives.Slot
custodyGroups peerdas.ColumnIndices
toDownload map[[32]byte]*toDownload
}
type toDownload struct {
remaining peerdas.ColumnIndices
commitments [][]byte
}
func (cs *columnBatch) needed() peerdas.ColumnIndices {
// make a copy that we can modify to reduce search iterations.
search := cs.custodyGroups.ToMap()
ci := peerdas.ColumnIndices{}
for _, v := range cs.toDownload {
if len(search) == 0 {
return ci
}
for col := range search {
if v.remaining.Has(col) {
ci.Set(col)
// avoid iterating every single block+index by only searching for indices
// we haven't found yet.
delete(search, col)
}
}
}
return ci
}
// neededSidecarCount returns the total number of sidecars still needed to complete the batch.
func (cs *columnBatch) neededSidecarCount() int {
count := 0
for _, v := range cs.toDownload {
count += v.remaining.Count()
}
return count
}
// neededSidecarsByColumn counts how many sidecars are still needed for each column index.
func (cs *columnBatch) neededSidecarsByColumn(peerHas peerdas.ColumnIndices) map[uint64]int {
need := make(map[uint64]int, len(peerHas))
for _, v := range cs.toDownload {
for idx := range v.remaining {
if peerHas.Has(idx) {
need[idx]++
}
}
}
return need
}
type columnSync struct {
*columnBatch
store *das.LazilyPersistentStoreColumn
current primitives.Slot
peer peer.ID
bisector *columnBisector
}
func newColumnSync(ctx context.Context, b batch, blks verifiedROBlocks, current primitives.Slot, p p2p.P2P, vbs verifiedROBlocks, cfg *workerCfg) (*columnSync, error) {
cgc, err := p.CustodyGroupCount(ctx)
if err != nil {
return nil, errors.Wrap(err, "custody group count")
}
cb, err := buildColumnBatch(ctx, b, blks, p, cfg.colStore)
if err != nil {
return nil, err
}
if cb == nil {
return &columnSync{}, nil
}
bisector := newColumnBisector(cfg.downscore)
return &columnSync{
columnBatch: cb,
current: current,
store: das.NewLazilyPersistentStoreColumn(cfg.colStore, cfg.newVC, p.NodeID(), cgc, bisector),
bisector: bisector,
}, nil
}
func (cs *columnSync) blockColumns(root [32]byte) *toDownload {
if cs.columnBatch == nil {
return nil
}
return cs.columnBatch.toDownload[root]
}
func (cs *columnSync) columnsNeeded() peerdas.ColumnIndices {
if cs.columnBatch == nil {
return peerdas.ColumnIndices{}
}
return cs.columnBatch.needed()
}
func (cs *columnSync) request(reqCols []uint64) *ethpb.DataColumnSidecarsByRangeRequest {
// slice b.nextReqCols to limit the size of the request.
reqCount := 0
peerHas := peerdas.NewColumnIndicesFromSlice(reqCols)
needed := cs.neededSidecarsByColumn(peerHas)
for i := range reqCols {
addSidecars := needed[reqCols[i]] // number of sidecars this column would add to the response
if reqCount+addSidecars > columnRequestLimit {
reqCols = reqCols[:i]
break
}
reqCount += addSidecars
}
if len(reqCols) == 0 {
return nil
}
return sync.DataColumnSidecarsByRangeRequest(reqCols, cs.first, cs.last)
}
type validatingColumnRequest struct {
req *ethpb.DataColumnSidecarsByRangeRequest
columnSync *columnSync
bisector *columnBisector
}
func (v *validatingColumnRequest) validate(cd blocks.RODataColumn) (err error) {
defer func(validity string, start time.Time) {
dataColumnSidecarVerifyMs.Observe(float64(time.Since(start).Milliseconds()))
if err != nil {
validity = "invalid"
}
dataColumnSidecarDownloadCount.WithLabelValues(fmt.Sprintf("%d", cd.Index), validity).Inc()
dataColumnSidecarDownloadBytes.Add(float64(cd.SizeSSZ()))
}("valid", time.Now())
return v.countedValidation(cd)
}
// When we call Persist we'll get the verification checks that are provided by the availability store.
// In addition to those checks this function calls rpcValidity which maintains a state machine across
// response values to ensure that the response is valid in the context of the overall request,
// like making sure that the block roots is one of the ones we expect based on the blocks we used to
// construct the request. It also does cheap sanity checks on the DataColumnSidecar values like
// ensuring that the commitments line up with the block.
func (v *validatingColumnRequest) countedValidation(cd blocks.RODataColumn) error {
root := cd.BlockRoot()
expected := v.columnSync.blockColumns(root)
if expected == nil {
return errors.Wrapf(errUnexpectedBlockRoot, "root=%#x, slot=%d", root, cd.Slot())
}
// We don't need this column, but we trust the column state machine verified we asked for it as part of a range request.
// So we can just skip over it and not try to persist it.
if !expected.remaining.Has(cd.Index) {
return nil
}
if len(cd.KzgCommitments) != len(expected.commitments) {
return errors.Wrapf(errCommitmentLengthMismatch, "root=%#x, slot=%d, index=%d", root, cd.Slot(), cd.Index)
}
for i, cmt := range cd.KzgCommitments {
if !bytes.Equal(cmt, expected.commitments[i]) {
return errors.Wrapf(errCommitmentValueMismatch, "root=%#x, slot=%d, index=%d", root, cd.Slot(), cd.Index)
}
}
if err := v.columnSync.store.Persist(v.columnSync.current, cd); err != nil {
return errors.Wrap(err, "persisting data column")
}
v.bisector.addPeerColumns(v.columnSync.peer, cd)
expected.remaining.Unset(cd.Index)
return nil
}
func currentCustodiedColumns(ctx context.Context, p p2p.P2P) (peerdas.ColumnIndices, error) {
cgc, err := p.CustodyGroupCount(ctx)
if err != nil {
return nil, errors.Wrap(err, "custody group count")
}
// Note that in the case where custody_group_count is the minimum CUSTODY_REQUIREMENT, we will
// still download the extra columns dictated by SAMPLES_PER_SLOT. This is a hack to avoid complexity in the DA check.
// We may want to revisit this to reduce bandwidth and storage for nodes with 0 validators attached.
peerInfo, _, err := peerdas.Info(p.NodeID(), max(cgc, params.BeaconConfig().SamplesPerSlot))
if err != nil {
return nil, errors.Wrap(err, "peer info")
}
return peerdas.NewColumnIndicesFromMap(peerInfo.CustodyColumns), nil
}
func buildColumnBatch(ctx context.Context, b batch, fuluBlocks verifiedROBlocks, p p2p.P2P, store *filesystem.DataColumnStorage) (*columnBatch, error) {
if len(fuluBlocks) == 0 {
return nil, nil
}
fuluStart := params.BeaconConfig().FuluForkEpoch
// If the batch end slot or last result block are pre-fulu, so are the rest.
if slots.ToEpoch(b.end) < fuluStart || slots.ToEpoch(fuluBlocks[len(fuluBlocks)-1].Block().Slot()) < fuluStart {
return nil, nil
}
// The last block in the batch is in fulu, but the first one is not.
// Find the index of the first fulu block to exclude the pre-fulu blocks.
if slots.ToEpoch(fuluBlocks[0].Block().Slot()) < fuluStart {
fuluStart := sort.Search(len(fuluBlocks), func(i int) bool {
return slots.ToEpoch(fuluBlocks[i].Block().Slot()) >= fuluStart
})
fuluBlocks = fuluBlocks[fuluStart:]
}
indices, err := currentCustodiedColumns(ctx, p)
if err != nil {
return nil, errors.Wrap(err, "current custodied columns")
}
summary := &columnBatch{
custodyGroups: indices,
toDownload: make(map[[32]byte]*toDownload, len(fuluBlocks)),
}
for _, b := range fuluBlocks {
cmts, err := b.Block().Body().BlobKzgCommitments()
if err != nil {
return nil, errors.Wrap(err, "failed to get blob kzg commitments")
}
if len(cmts) == 0 {
continue
}
// At this point in the loop we know the block has blobs.
// The last block this part of the loop sees will be the last one
// we need to download blobs for.
summary.last = b.Block().Slot()
if len(summary.toDownload) == 0 {
// toDownload is only empty the first time through, so this is the first block with blobs.
summary.first = summary.last
}
summary.toDownload[b.Root()] = &toDownload{
remaining: das.IndicesNotStored(store.Summary(b.Root()), indices),
commitments: cmts,
}
}
return summary, nil
}

View File

@@ -0,0 +1,988 @@
package backfill
import (
"context"
"testing"
"github.com/OffchainLabs/prysm/v7/beacon-chain/core/peerdas"
"github.com/OffchainLabs/prysm/v7/beacon-chain/das"
"github.com/OffchainLabs/prysm/v7/beacon-chain/db/filesystem"
p2ptest "github.com/OffchainLabs/prysm/v7/beacon-chain/p2p/testing"
"github.com/OffchainLabs/prysm/v7/beacon-chain/verification"
"github.com/OffchainLabs/prysm/v7/config/params"
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
"github.com/OffchainLabs/prysm/v7/testing/require"
"github.com/OffchainLabs/prysm/v7/testing/util"
"github.com/OffchainLabs/prysm/v7/time/slots"
"github.com/libp2p/go-libp2p/core/peer"
)
// Helper function to create a columnBatch for testing
func testColumnBatch(custodyGroups peerdas.ColumnIndices, toDownload map[[32]byte]*toDownload) *columnBatch {
return &columnBatch{
custodyGroups: custodyGroups,
toDownload: toDownload,
}
}
// Helper function to create test toDownload entries
func testToDownload(remaining peerdas.ColumnIndices, commitments [][]byte) *toDownload {
return &toDownload{
remaining: remaining,
commitments: commitments,
}
}
// TestColumnBatchNeeded_EmptyBatch tests that needed() returns empty indices when batch has no blocks
func TestColumnBatchNeeded_EmptyBatch(t *testing.T) {
custodyGroups := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2})
toDownload := make(map[[32]byte]*toDownload)
cb := testColumnBatch(custodyGroups, toDownload)
result := cb.needed()
require.Equal(t, 0, result.Count(), "needed() should return empty indices for empty batch")
}
// TestColumnBatchNeeded_NoCustodyGroups tests that needed() returns empty indices when there are no custody groups
func TestColumnBatchNeeded_NoCustodyGroups(t *testing.T) {
custodyGroups := peerdas.NewColumnIndices()
toDownload := map[[32]byte]*toDownload{
[32]byte{0x01}: testToDownload(peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2}), nil),
}
cb := testColumnBatch(custodyGroups, toDownload)
result := cb.needed()
require.Equal(t, 0, result.Count(), "needed() should return empty indices when there are no custody groups")
}
// TestColumnBatchNeeded_AllColumnsStored tests that needed() returns empty when all custody columns are already stored
func TestColumnBatchNeeded_AllColumnsStored(t *testing.T) {
custodyGroups := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2})
// All custody columns are already stored (remaining is empty)
toDownload := map[[32]byte]*toDownload{
[32]byte{0x01}: testToDownload(peerdas.NewColumnIndices(), nil),
}
cb := testColumnBatch(custodyGroups, toDownload)
result := cb.needed()
require.Equal(t, 0, result.Count(), "needed() should return empty indices when all custody columns are stored")
}
// TestColumnBatchNeeded_NoColumnsStored tests that needed() returns all custody columns when none are stored
func TestColumnBatchNeeded_NoColumnsStored(t *testing.T) {
custodyGroups := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2})
// All custody columns need to be downloaded
remaining := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2})
toDownload := map[[32]byte]*toDownload{
[32]byte{0x01}: testToDownload(remaining, nil),
}
cb := testColumnBatch(custodyGroups, toDownload)
result := cb.needed()
require.Equal(t, 3, result.Count(), "needed() should return all custody columns when none are stored")
require.Equal(t, true, result.Has(0), "result should contain column 0")
require.Equal(t, true, result.Has(1), "result should contain column 1")
require.Equal(t, true, result.Has(2), "result should contain column 2")
}
// TestColumnBatchNeeded_PartialDownload tests that needed() returns only the remaining columns
func TestColumnBatchNeeded_PartialDownload(t *testing.T) {
custodyGroups := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2, 3})
// Columns 0 and 2 are already stored, 1 and 3 still need downloading
remaining := peerdas.NewColumnIndicesFromSlice([]uint64{1, 3})
toDownload := map[[32]byte]*toDownload{
[32]byte{0x01}: testToDownload(remaining, nil),
}
cb := testColumnBatch(custodyGroups, toDownload)
result := cb.needed()
require.Equal(t, 2, result.Count(), "needed() should return only remaining columns")
require.Equal(t, false, result.Has(0), "result should not contain column 0 (already stored)")
require.Equal(t, true, result.Has(1), "result should contain column 1")
require.Equal(t, false, result.Has(2), "result should not contain column 2 (already stored)")
require.Equal(t, true, result.Has(3), "result should contain column 3")
}
// TestColumnBatchNeeded_NoCommitments tests handling of blocks without blob commitments
func TestColumnBatchNeeded_NoCommitments(t *testing.T) {
custodyGroups := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2})
// Empty toDownload map (no blocks with commitments)
toDownload := make(map[[32]byte]*toDownload)
cb := testColumnBatch(custodyGroups, toDownload)
result := cb.needed()
require.Equal(t, 0, result.Count(), "needed() should return empty indices when no blocks have commitments")
}
// TestColumnBatchNeeded_SingleBlock tests needed() with a single block
func TestColumnBatchNeeded_SingleBlock(t *testing.T) {
cases := []struct {
name string
custodyGroups []uint64
remaining []uint64
expectedCount int
expectedCols []uint64
}{
{
name: "single block, all columns needed",
custodyGroups: []uint64{0, 1, 2},
remaining: []uint64{0, 1, 2},
expectedCount: 3,
expectedCols: []uint64{0, 1, 2},
},
{
name: "single block, partial columns needed",
custodyGroups: []uint64{0, 1, 2, 3},
remaining: []uint64{1, 3},
expectedCount: 2,
expectedCols: []uint64{1, 3},
},
{
name: "single block, no columns needed",
custodyGroups: []uint64{0, 1, 2},
remaining: []uint64{},
expectedCount: 0,
expectedCols: []uint64{},
},
{
name: "single block, remaining has non-custody columns",
custodyGroups: []uint64{0, 1},
remaining: []uint64{0, 5, 10}, // 5 and 10 are not custody columns
expectedCount: 1,
expectedCols: []uint64{0}, // Only custody column 0 is needed
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
custodyGroups := peerdas.NewColumnIndicesFromSlice(c.custodyGroups)
remaining := peerdas.NewColumnIndicesFromSlice(c.remaining)
toDownload := map[[32]byte]*toDownload{
[32]byte{0x01}: testToDownload(remaining, nil),
}
cb := testColumnBatch(custodyGroups, toDownload)
result := cb.needed()
require.Equal(t, c.expectedCount, result.Count(), "unexpected count of needed columns")
for _, col := range c.expectedCols {
require.Equal(t, true, result.Has(col), "result should contain column %d", col)
}
})
}
}
// TestColumnBatchNeeded_MultipleBlocks_SameNeeds tests multiple blocks all needing the same columns
func TestColumnBatchNeeded_MultipleBlocks_SameNeeds(t *testing.T) {
custodyGroups := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2})
// All three blocks need the same columns
remaining := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2})
toDownload := map[[32]byte]*toDownload{
[32]byte{0x01}: testToDownload(remaining.Copy(), nil),
[32]byte{0x02}: testToDownload(remaining.Copy(), nil),
[32]byte{0x03}: testToDownload(remaining.Copy(), nil),
}
cb := testColumnBatch(custodyGroups, toDownload)
result := cb.needed()
require.Equal(t, 3, result.Count(), "needed() should return all custody columns")
require.Equal(t, true, result.Has(0), "result should contain column 0")
require.Equal(t, true, result.Has(1), "result should contain column 1")
require.Equal(t, true, result.Has(2), "result should contain column 2")
}
// TestColumnBatchNeeded_MultipleBlocks_DifferentNeeds tests multiple blocks needing different columns
func TestColumnBatchNeeded_MultipleBlocks_DifferentNeeds(t *testing.T) {
custodyGroups := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2, 3, 4})
// Block 1 needs columns 0, 1
// Block 2 needs columns 2, 3
// Block 3 needs columns 4
toDownload := map[[32]byte]*toDownload{
[32]byte{0x01}: testToDownload(peerdas.NewColumnIndicesFromSlice([]uint64{0, 1}), nil),
[32]byte{0x02}: testToDownload(peerdas.NewColumnIndicesFromSlice([]uint64{2, 3}), nil),
[32]byte{0x03}: testToDownload(peerdas.NewColumnIndicesFromSlice([]uint64{4}), nil),
}
cb := testColumnBatch(custodyGroups, toDownload)
result := cb.needed()
require.Equal(t, 5, result.Count(), "needed() should return union of all needed columns")
require.Equal(t, true, result.Has(0), "result should contain column 0")
require.Equal(t, true, result.Has(1), "result should contain column 1")
require.Equal(t, true, result.Has(2), "result should contain column 2")
require.Equal(t, true, result.Has(3), "result should contain column 3")
require.Equal(t, true, result.Has(4), "result should contain column 4")
}
// TestColumnBatchNeeded_MixedBlockStates tests blocks in different download states
func TestColumnBatchNeeded_MixedBlockStates(t *testing.T) {
custodyGroups := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2, 3})
// Block 1: all columns complete (empty remaining)
// Block 2: partially complete (columns 1, 3 remaining)
// Block 3: nothing downloaded yet (all custody columns remaining)
toDownload := map[[32]byte]*toDownload{
[32]byte{0x01}: testToDownload(peerdas.NewColumnIndices(), nil),
[32]byte{0x02}: testToDownload(peerdas.NewColumnIndicesFromSlice([]uint64{1, 3}), nil),
[32]byte{0x03}: testToDownload(peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2, 3}), nil),
}
cb := testColumnBatch(custodyGroups, toDownload)
result := cb.needed()
// Should return all custody columns that appear in at least one block's remaining set
require.Equal(t, 4, result.Count(), "needed() should return all columns that are needed by at least one block")
require.Equal(t, true, result.Has(0), "result should contain column 0")
require.Equal(t, true, result.Has(1), "result should contain column 1")
require.Equal(t, true, result.Has(2), "result should contain column 2")
require.Equal(t, true, result.Has(3), "result should contain column 3")
}
// TestColumnBatchNeeded_EarlyExitOptimization tests the early exit optimization when all custody columns are found
func TestColumnBatchNeeded_EarlyExitOptimization(t *testing.T) {
custodyGroups := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1})
// Block 1 has both custody columns in remaining
// Block 2 also has columns in remaining, but they shouldn't affect the result
// The algorithm should exit early after finding all custody columns in block 1
toDownload := map[[32]byte]*toDownload{
[32]byte{0x01}: testToDownload(peerdas.NewColumnIndicesFromSlice([]uint64{0, 1}), nil),
[32]byte{0x02}: testToDownload(peerdas.NewColumnIndicesFromSlice([]uint64{0, 1}), nil),
}
cb := testColumnBatch(custodyGroups, toDownload)
result := cb.needed()
// Should find both custody columns
require.Equal(t, 2, result.Count(), "needed() should find all custody columns")
require.Equal(t, true, result.Has(0), "result should contain column 0")
require.Equal(t, true, result.Has(1), "result should contain column 1")
}
// TestColumnBatchNeeded_AfterUnset tests that needed() updates correctly after Unset() is called
func TestColumnBatchNeeded_AfterUnset(t *testing.T) {
custodyGroups := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2})
remaining := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2})
toDownload := map[[32]byte]*toDownload{
[32]byte{0x01}: testToDownload(remaining, nil),
}
cb := testColumnBatch(custodyGroups, toDownload)
// Initial state: all columns needed
result := cb.needed()
require.Equal(t, 3, result.Count(), "initially, all custody columns should be needed")
// Simulate downloading column 1
remaining.Unset(1)
// After Unset: column 1 should no longer be needed
result = cb.needed()
require.Equal(t, 2, result.Count(), "after Unset(1), only 2 columns should be needed")
require.Equal(t, true, result.Has(0), "result should still contain column 0")
require.Equal(t, false, result.Has(1), "result should not contain column 1 after Unset")
require.Equal(t, true, result.Has(2), "result should still contain column 2")
// Simulate downloading all remaining columns
remaining.Unset(0)
remaining.Unset(2)
// After all Unsets: no columns needed
result = cb.needed()
require.Equal(t, 0, result.Count(), "after all columns downloaded, none should be needed")
}
// TestColumnBatchNeeded_MultipleBlocks_AfterPartialUnset tests partial completion across multiple blocks
func TestColumnBatchNeeded_MultipleBlocks_AfterPartialUnset(t *testing.T) {
custodyGroups := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2})
remaining1 := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2})
remaining2 := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2})
toDownload := map[[32]byte]*toDownload{
[32]byte{0x01}: testToDownload(remaining1, nil),
[32]byte{0x02}: testToDownload(remaining2, nil),
}
cb := testColumnBatch(custodyGroups, toDownload)
// Initial state: all columns needed from both blocks
result := cb.needed()
require.Equal(t, 3, result.Count(), "initially, all custody columns should be needed")
// Download column 0 from block 1 only
remaining1.Unset(0)
// Column 0 is still needed because block 2 still needs it
result = cb.needed()
require.Equal(t, 3, result.Count(), "column 0 still needed by block 2")
require.Equal(t, true, result.Has(0), "column 0 still in needed set")
// Download column 0 from block 2 as well
remaining2.Unset(0)
// Now column 0 is no longer needed by any block
result = cb.needed()
require.Equal(t, 2, result.Count(), "column 0 no longer needed by any block")
require.Equal(t, false, result.Has(0), "column 0 should not be in needed set")
require.Equal(t, true, result.Has(1), "column 1 still needed")
require.Equal(t, true, result.Has(2), "column 2 still needed")
}
// TestColumnBatchNeeded_LargeColumnIndices tests with realistic column indices for PeerDAS
func TestColumnBatchNeeded_LargeColumnIndices(t *testing.T) {
// Simulate a realistic scenario with larger column indices
custodyGroups := peerdas.NewColumnIndicesFromSlice([]uint64{5, 16, 27, 38, 49, 60, 71, 82, 93, 104, 115, 126})
remaining := peerdas.NewColumnIndicesFromSlice([]uint64{5, 16, 27, 38, 49, 60, 71, 82, 93, 104, 115, 126})
toDownload := map[[32]byte]*toDownload{
[32]byte{0x01}: testToDownload(remaining, nil),
}
cb := testColumnBatch(custodyGroups, toDownload)
result := cb.needed()
require.Equal(t, 12, result.Count(), "should handle larger column indices correctly")
require.Equal(t, true, result.Has(5), "result should contain column 5")
require.Equal(t, true, result.Has(126), "result should contain column 126")
}
// TestBuildColumnBatch tests the buildColumnBatch function
func TestBuildColumnBatch(t *testing.T) {
params.SetupTestConfigCleanup(t)
// Setup Fulu fork epoch if not already set
denebEpoch := params.BeaconConfig().DenebForkEpoch
if params.BeaconConfig().FuluForkEpoch == params.BeaconConfig().FarFutureEpoch {
params.BeaconConfig().FuluForkEpoch = denebEpoch + 4096*2
}
fuluEpoch := params.BeaconConfig().FuluForkEpoch
fuluSlot, err := slots.EpochStart(fuluEpoch)
require.NoError(t, err)
denebSlot, err := slots.EpochStart(denebEpoch)
require.NoError(t, err)
t.Run("empty blocks returns nil", func(t *testing.T) {
ctx := context.Background()
p := p2ptest.NewTestP2P(t)
store := filesystem.NewEphemeralDataColumnStorage(t)
cb, err := buildColumnBatch(ctx, batch{}, verifiedROBlocks{}, p, store)
require.NoError(t, err)
require.Equal(t, true, cb == nil)
})
t.Run("pre-Fulu batch end returns nil", func(t *testing.T) {
ctx := context.Background()
p := p2ptest.NewTestP2P(t)
store := filesystem.NewEphemeralDataColumnStorage(t)
// Create blocks in Deneb
blks, _ := testBlobGen(t, denebSlot, 2)
b := batch{
begin: denebSlot,
end: denebSlot + 10,
}
cb, err := buildColumnBatch(ctx, b, blks, p, store)
require.NoError(t, err)
require.Equal(t, true, cb == nil)
})
t.Run("pre-Fulu last block returns nil", func(t *testing.T) {
ctx := context.Background()
p := p2ptest.NewTestP2P(t)
store := filesystem.NewEphemeralDataColumnStorage(t)
// Create blocks before Fulu but batch end after
blks, _ := testBlobGen(t, denebSlot, 2)
b := batch{
begin: denebSlot,
end: fuluSlot + 10,
}
cb, err := buildColumnBatch(ctx, b, blks, p, store)
require.NoError(t, err)
require.Equal(t, true, cb == nil)
})
t.Run("boundary: batch end exactly at Fulu epoch", func(t *testing.T) {
ctx := context.Background()
p := p2ptest.NewTestP2P(t)
store := filesystem.NewEphemeralDataColumnStorage(t)
// Create blocks at Fulu start
blks, _ := testBlobGen(t, fuluSlot, 2)
b := batch{
begin: fuluSlot,
end: fuluSlot,
}
cb, err := buildColumnBatch(ctx, b, blks, p, store)
require.NoError(t, err)
require.NotNil(t, cb, "batch at Fulu boundary should not be nil")
})
t.Run("boundary: last block exactly at Fulu epoch", func(t *testing.T) {
ctx := context.Background()
p := p2ptest.NewTestP2P(t)
store := filesystem.NewEphemeralDataColumnStorage(t)
// Create blocks at Fulu start
blks, _ := testBlobGen(t, fuluSlot, 1)
b := batch{
begin: fuluSlot,
end: fuluSlot + 100,
}
cb, err := buildColumnBatch(ctx, b, blks, p, store)
require.NoError(t, err)
require.NotNil(t, cb, "last block at Fulu boundary should not be nil")
})
t.Run("mixed epochs: first block pre-Fulu, last block post-Fulu", func(t *testing.T) {
ctx := context.Background()
p := p2ptest.NewTestP2P(t)
store := filesystem.NewEphemeralDataColumnStorage(t)
// Create blocks spanning the fork: 2 before, 2 after
preFuluCount := 2
postFuluCount := 2
startSlot := fuluSlot - primitives.Slot(preFuluCount)
allBlocks := make([]blocks.ROBlock, 0, preFuluCount+postFuluCount)
preBlocks, _ := testBlobGen(t, startSlot, preFuluCount)
postBlocks, _ := testBlobGen(t, fuluSlot, postFuluCount)
allBlocks = append(allBlocks, preBlocks...)
allBlocks = append(allBlocks, postBlocks...)
b := batch{
begin: startSlot,
end: fuluSlot + primitives.Slot(postFuluCount),
}
cb, err := buildColumnBatch(ctx, b, allBlocks, p, store)
require.NoError(t, err)
require.NotNil(t, cb, "mixed epoch batch should not be nil")
// Should only include Fulu blocks
require.Equal(t, postFuluCount, len(cb.toDownload), "should only include Fulu blocks")
})
t.Run("boundary: first block exactly at Fulu epoch", func(t *testing.T) {
ctx := context.Background()
p := p2ptest.NewTestP2P(t)
store := filesystem.NewEphemeralDataColumnStorage(t)
// Create blocks starting exactly at Fulu
blks, _ := testBlobGen(t, fuluSlot, 3)
b := batch{
begin: fuluSlot,
end: fuluSlot + 100,
}
cb, err := buildColumnBatch(ctx, b, blks, p, store)
require.NoError(t, err)
require.NotNil(t, cb, "first block at Fulu should not be nil")
require.Equal(t, 3, len(cb.toDownload), "should include all 3 blocks")
})
t.Run("single Fulu block with commitments", func(t *testing.T) {
ctx := context.Background()
p := p2ptest.NewTestP2P(t)
store := filesystem.NewEphemeralDataColumnStorage(t)
blks, _ := testBlobGen(t, fuluSlot, 1)
b := batch{
begin: fuluSlot,
end: fuluSlot + 10,
}
cb, err := buildColumnBatch(ctx, b, blks, p, store)
require.NoError(t, err)
require.NotNil(t, cb)
require.Equal(t, fuluSlot, cb.first, "first slot should be set")
require.Equal(t, fuluSlot, cb.last, "last slot should equal first for single block")
require.Equal(t, 1, len(cb.toDownload))
})
t.Run("multiple blocks: first and last assignment", func(t *testing.T) {
ctx := context.Background()
p := p2ptest.NewTestP2P(t)
store := filesystem.NewEphemeralDataColumnStorage(t)
blks, _ := testBlobGen(t, fuluSlot, 5)
b := batch{
begin: fuluSlot,
end: fuluSlot + 10,
}
cb, err := buildColumnBatch(ctx, b, blks, p, store)
require.NoError(t, err)
require.NotNil(t, cb)
require.Equal(t, fuluSlot, cb.first, "first should be slot of first block with commitments")
require.Equal(t, fuluSlot+4, cb.last, "last should be slot of last block with commitments")
})
t.Run("blocks without commitments are skipped", func(t *testing.T) {
ctx := context.Background()
p := p2ptest.NewTestP2P(t)
store := filesystem.NewEphemeralDataColumnStorage(t)
// Create blocks with commitments
blksWithCmts, _ := testBlobGen(t, fuluSlot, 2)
// Create a block without commitments (manually)
blkNoCmt, _ := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, fuluSlot+2, 0)
// Mix them together
allBlocks := []blocks.ROBlock{
blksWithCmts[0],
blkNoCmt, // no commitments - should be skipped via continue
blksWithCmts[1],
}
b := batch{
begin: fuluSlot,
end: fuluSlot + 10,
}
cb, err := buildColumnBatch(ctx, b, allBlocks, p, store)
require.NoError(t, err)
require.NotNil(t, cb)
// Should only have 2 blocks (those with commitments)
require.Equal(t, 2, len(cb.toDownload), "should skip blocks without commitments")
})
}
// TestColumnSync_BlockColumns tests the blockColumns method
func TestColumnSync_BlockColumns(t *testing.T) {
t.Run("nil columnBatch returns nil", func(t *testing.T) {
cs := &columnSync{
columnBatch: nil,
}
result := cs.blockColumns([32]byte{0x01})
require.Equal(t, true, result == nil)
})
t.Run("existing block root returns toDownload", func(t *testing.T) {
root := [32]byte{0x01}
expected := &toDownload{
remaining: peerdas.NewColumnIndicesFromSlice([]uint64{1, 2, 3}),
commitments: [][]byte{{0xaa}, {0xbb}},
}
cs := &columnSync{
columnBatch: &columnBatch{
toDownload: map[[32]byte]*toDownload{
root: expected,
},
},
}
result := cs.blockColumns(root)
require.Equal(t, expected, result)
})
t.Run("non-existing block root returns nil", func(t *testing.T) {
cs := &columnSync{
columnBatch: &columnBatch{
toDownload: map[[32]byte]*toDownload{
[32]byte{0x01}: {
remaining: peerdas.NewColumnIndicesFromSlice([]uint64{1}),
},
},
},
}
result := cs.blockColumns([32]byte{0x99})
require.Equal(t, true, result == nil)
})
}
// TestColumnSync_ColumnsNeeded tests the columnsNeeded method
func TestColumnSync_ColumnsNeeded(t *testing.T) {
t.Run("nil columnBatch returns empty indices", func(t *testing.T) {
cs := &columnSync{
columnBatch: nil,
}
result := cs.columnsNeeded()
require.Equal(t, 0, result.Count())
})
t.Run("delegates to needed() when columnBatch exists", func(t *testing.T) {
custodyGroups := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2})
remaining := peerdas.NewColumnIndicesFromSlice([]uint64{1, 2})
cs := &columnSync{
columnBatch: &columnBatch{
custodyGroups: custodyGroups,
toDownload: map[[32]byte]*toDownload{
[32]byte{0x01}: {
remaining: remaining,
},
},
},
}
result := cs.columnsNeeded()
require.Equal(t, 2, result.Count())
require.Equal(t, true, result.Has(1))
require.Equal(t, true, result.Has(2))
})
}
// TestValidatingColumnRequest_CountedValidation tests the countedValidation method
func TestValidatingColumnRequest_CountedValidation(t *testing.T) {
mockPeer := peer.ID("test-peer")
t.Run("unexpected block root returns error", func(t *testing.T) {
// Create a data column with a specific block root
params := []util.DataColumnParam{
{
Index: 0,
Slot: 100,
ProposerIndex: 1,
KzgCommitments: [][]byte{{0xaa}},
},
}
roCols, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, params)
vcr := &validatingColumnRequest{
columnSync: &columnSync{
columnBatch: &columnBatch{
toDownload: map[[32]byte]*toDownload{
// Different root from what the column has
[32]byte{0x99}: {
remaining: peerdas.NewColumnIndicesFromSlice([]uint64{0}),
},
},
},
peer: mockPeer,
},
bisector: newColumnBisector(func(peer.ID, string, error) {}),
}
err := vcr.countedValidation(roCols[0])
require.ErrorIs(t, err, errUnexpectedBlockRoot)
})
t.Run("column not in remaining set returns nil (skipped)", func(t *testing.T) {
blockRoot := [32]byte{0x01}
params := []util.DataColumnParam{
{
Index: 5, // Not in remaining set
Slot: 100,
ProposerIndex: 1,
ParentRoot: blockRoot[:],
KzgCommitments: [][]byte{{0xaa}},
},
}
roCols, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, params)
remaining := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2}) // 5 not included
vcr := &validatingColumnRequest{
columnSync: &columnSync{
columnBatch: &columnBatch{
toDownload: map[[32]byte]*toDownload{
roCols[0].BlockRoot(): {
remaining: remaining,
commitments: [][]byte{{0xaa}},
},
},
},
peer: mockPeer,
},
bisector: newColumnBisector(func(peer.ID, string, error) {}),
}
err := vcr.countedValidation(roCols[0])
require.NoError(t, err, "should return nil when column not needed")
// Verify remaining was not modified
require.Equal(t, 3, remaining.Count())
})
t.Run("commitment length mismatch returns error", func(t *testing.T) {
blockRoot := [32]byte{0x01}
params := []util.DataColumnParam{
{
Index: 0,
Slot: 100,
ProposerIndex: 1,
ParentRoot: blockRoot[:],
KzgCommitments: [][]byte{{0xaa}, {0xbb}}, // 2 commitments
},
}
roCols, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, params)
vcr := &validatingColumnRequest{
columnSync: &columnSync{
columnBatch: &columnBatch{
toDownload: map[[32]byte]*toDownload{
roCols[0].BlockRoot(): {
remaining: peerdas.NewColumnIndicesFromSlice([]uint64{0}),
commitments: [][]byte{{0xaa}}, // Only 1 commitment - mismatch!
},
},
},
peer: mockPeer,
},
bisector: newColumnBisector(func(peer.ID, string, error) {}),
}
err := vcr.countedValidation(roCols[0])
require.ErrorIs(t, err, errCommitmentLengthMismatch)
})
t.Run("commitment value mismatch returns error", func(t *testing.T) {
blockRoot := [32]byte{0x01}
params := []util.DataColumnParam{
{
Index: 0,
Slot: 100,
ProposerIndex: 1,
ParentRoot: blockRoot[:],
KzgCommitments: [][]byte{{0xaa}, {0xbb}},
},
}
roCols, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, params)
vcr := &validatingColumnRequest{
columnSync: &columnSync{
columnBatch: &columnBatch{
toDownload: map[[32]byte]*toDownload{
roCols[0].BlockRoot(): {
remaining: peerdas.NewColumnIndicesFromSlice([]uint64{0}),
// Different commitment values
commitments: [][]byte{{0xaa}, {0xcc}},
},
},
},
peer: mockPeer,
},
bisector: newColumnBisector(func(peer.ID, string, error) {}),
}
err := vcr.countedValidation(roCols[0])
require.ErrorIs(t, err, errCommitmentValueMismatch)
})
t.Run("successful validation updates state correctly", func(t *testing.T) {
currentSlot := primitives.Slot(200)
// Create a valid data column
blockRoot := [32]byte{0x01}
commitment := make([]byte, 48) // KZG commitments are 48 bytes
commitment[0] = 0xaa
params := []util.DataColumnParam{
{
Index: 0,
Slot: 100,
ProposerIndex: 1,
ParentRoot: blockRoot[:],
KzgCommitments: [][]byte{commitment},
},
}
roCols, verifiedCols := util.CreateTestVerifiedRoDataColumnSidecars(t, params)
// Mock storage and verifier
colStore := filesystem.NewEphemeralDataColumnStorage(t)
p2p := p2ptest.NewTestP2P(t)
remaining := peerdas.NewColumnIndicesFromSlice([]uint64{0})
bisector := newColumnBisector(func(peer.ID, string, error) {})
vcr := &validatingColumnRequest{
columnSync: &columnSync{
columnBatch: &columnBatch{
toDownload: map[[32]byte]*toDownload{
roCols[0].BlockRoot(): {
remaining: remaining,
commitments: [][]byte{{0xaa}},
},
},
},
store: das.NewLazilyPersistentStoreColumn(colStore, testNewDataColumnsVerifier(), p2p.NodeID(), 1, bisector),
current: currentSlot,
peer: mockPeer,
},
bisector: bisector,
}
// Add peer columns tracking
vcr.bisector.addPeerColumns(mockPeer, roCols[0])
// First save the verified column so Persist can work
err := colStore.Save([]blocks.VerifiedRODataColumn{verifiedCols[0]})
require.NoError(t, err)
// Update the columnBatch toDownload to use the correct commitment size
vcr.columnSync.columnBatch.toDownload[roCols[0].BlockRoot()].commitments = [][]byte{commitment}
// Now test validation - it should mark the column as downloaded
require.Equal(t, true, remaining.Has(0), "column 0 should be in remaining before validation")
err = vcr.countedValidation(roCols[0])
require.NoError(t, err)
// Verify that remaining.Unset was called (column 0 should be removed)
require.Equal(t, false, remaining.Has(0), "column 0 should be removed from remaining after validation")
require.Equal(t, 0, remaining.Count(), "remaining should be empty")
})
}
// TestNewColumnSync tests the newColumnSync function
func TestNewColumnSync(t *testing.T) {
params.SetupTestConfigCleanup(t)
denebEpoch := params.BeaconConfig().DenebForkEpoch
if params.BeaconConfig().FuluForkEpoch == params.BeaconConfig().FarFutureEpoch {
params.BeaconConfig().FuluForkEpoch = denebEpoch + 4096*2
}
fuluEpoch := params.BeaconConfig().FuluForkEpoch
fuluSlot, err := slots.EpochStart(fuluEpoch)
require.NoError(t, err)
t.Run("returns nil columnBatch when buildColumnBatch returns nil", func(t *testing.T) {
ctx := context.Background()
p2p := p2ptest.NewTestP2P(t)
colStore := filesystem.NewEphemeralDataColumnStorage(t)
current := primitives.Slot(100)
cfg := &workerCfg{
colStore: colStore,
downscore: func(peer.ID, string, error) {},
}
// Empty blocks should result in nil columnBatch
cs, err := newColumnSync(ctx, batch{}, verifiedROBlocks{}, current, p2p, verifiedROBlocks{}, cfg)
require.NoError(t, err)
require.NotNil(t, cs, "columnSync should not be nil")
require.Equal(t, true, cs.columnBatch == nil, "columnBatch should be nil for empty blocks")
})
t.Run("successful initialization with Fulu blocks", func(t *testing.T) {
ctx := context.Background()
p2p := p2ptest.NewTestP2P(t)
colStore := filesystem.NewEphemeralDataColumnStorage(t)
current := fuluSlot + 100
blks, _ := testBlobGen(t, fuluSlot, 2)
b := batch{
begin: fuluSlot,
end: fuluSlot + 10,
blocks: blks,
}
cfg := &workerCfg{
colStore: colStore,
downscore: func(peer.ID, string, error) {},
}
cs, err := newColumnSync(ctx, b, blks, current, p2p, verifiedROBlocks{}, cfg)
require.NoError(t, err)
require.NotNil(t, cs)
require.NotNil(t, cs.columnBatch, "columnBatch should be initialized")
require.NotNil(t, cs.store, "store should be initialized")
require.NotNil(t, cs.bisector, "bisector should be initialized")
require.Equal(t, current, cs.current)
})
}
// TestCurrentCustodiedColumns tests the currentCustodiedColumns function
func TestCurrentCustodiedColumns(t *testing.T) {
t.Run("successful column indices retrieval", func(t *testing.T) {
ctx := context.Background()
p2p := p2ptest.NewTestP2P(t)
indices, err := currentCustodiedColumns(ctx, p2p)
require.NoError(t, err)
require.NotNil(t, indices)
// Should have some custody columns based on default settings
require.Equal(t, true, indices.Count() > 0, "should have at least some custody columns")
})
}
// TestValidatingColumnRequest_Validate tests the validate method
func TestValidatingColumnRequest_Validate(t *testing.T) {
mockPeer := peer.ID("test-peer")
t.Run("validate wraps countedValidation and records metrics", func(t *testing.T) {
// Create a valid data column that won't be in the remaining set (so it skips Persist)
blockRoot := [32]byte{0x01}
commitment := make([]byte, 48)
commitment[0] = 0xaa
params := []util.DataColumnParam{
{
Index: 5, // Not in remaining set, so will skip Persist
Slot: 100,
ProposerIndex: 1,
ParentRoot: blockRoot[:],
KzgCommitments: [][]byte{commitment},
},
}
roCols, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, params)
remaining := peerdas.NewColumnIndicesFromSlice([]uint64{0, 1, 2}) // Column 5 not here
vcr := &validatingColumnRequest{
columnSync: &columnSync{
columnBatch: &columnBatch{
toDownload: map[[32]byte]*toDownload{
roCols[0].BlockRoot(): {
remaining: remaining,
commitments: [][]byte{commitment},
},
},
},
peer: mockPeer,
},
bisector: newColumnBisector(func(peer.ID, string, error) {}),
}
// Call validate (which wraps countedValidation)
err := vcr.validate(roCols[0])
// Should succeed - column not in remaining set, so it returns early
require.NoError(t, err)
})
t.Run("validate returns error from countedValidation", func(t *testing.T) {
// Create a data column with mismatched commitments
blockRoot := [32]byte{0x01}
params := []util.DataColumnParam{
{
Index: 0,
Slot: 100,
ProposerIndex: 1,
ParentRoot: blockRoot[:],
KzgCommitments: [][]byte{{0xaa}, {0xbb}},
},
}
roCols, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, params)
vcr := &validatingColumnRequest{
columnSync: &columnSync{
columnBatch: &columnBatch{
toDownload: map[[32]byte]*toDownload{
roCols[0].BlockRoot(): {
remaining: peerdas.NewColumnIndicesFromSlice([]uint64{0}),
commitments: [][]byte{{0xaa}}, // Length mismatch
},
},
},
peer: mockPeer,
},
bisector: newColumnBisector(func(peer.ID, string, error) {}),
}
// Call validate
err := vcr.validate(roCols[0])
// Should return the error from countedValidation
require.ErrorIs(t, err, errCommitmentLengthMismatch)
})
}
// Helper to create a test column verifier
func testNewDataColumnsVerifier() verification.NewDataColumnsVerifier {
return func([]blocks.RODataColumn, []verification.Requirement) verification.DataColumnsVerifier {
return &verification.MockDataColumnsVerifier{}
}
}

View File

@@ -0,0 +1,9 @@
package backfill
import "github.com/pkg/errors"
var errUnrecoverable = errors.New("service in unrecoverable state")
func isRetryable(err error) bool {
return !errors.Is(err, errUnrecoverable)
}

View File

@@ -0,0 +1,148 @@
package backfill
import (
"context"
"github.com/OffchainLabs/prysm/v7/beacon-chain/das"
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
"github.com/pkg/errors"
)
var errMissingAvailabilityChecker = errors.Wrap(errUnrecoverable, "batch is missing required availability checker")
var errUnsafeRange = errors.Wrap(errUnrecoverable, "invalid slice indices")
type checkMultiplexer struct {
blobCheck das.AvailabilityChecker
colCheck das.AvailabilityChecker
denebStart primitives.Slot
fuluStart primitives.Slot
}
// Persist implements das.AvailabilityStore.
var _ das.AvailabilityChecker = &checkMultiplexer{}
// newCheckMultiplexer initializes an AvailabilityChecker that multiplexes to the BlobSidecar and DataColumnSidecar
// AvailabilityCheckers present in the batch.
func newCheckMultiplexer(fuluStart, denebStart primitives.Slot, b batch) *checkMultiplexer {
s := &checkMultiplexer{fuluStart: fuluStart, denebStart: denebStart}
if b.blobs != nil && b.blobs.store != nil {
s.blobCheck = b.blobs.store
}
if b.columns != nil && b.columns.store != nil {
s.colCheck = b.columns.store
}
return s
}
// IsDataAvailable implements the das.AvailabilityStore interface.
func (m *checkMultiplexer) IsDataAvailable(ctx context.Context, current primitives.Slot, blks ...blocks.ROBlock) error {
needs, err := m.blockDaNeeds(blks)
if err != nil {
return errors.Wrap(errUnrecoverable, "failed to slice blocks by DA type")
}
if err := doAvailabilityCheck(ctx, m.blobCheck, current, needs.blobs); err != nil {
return errors.Wrap(err, "blob store availability check failed")
}
if err := doAvailabilityCheck(ctx, m.colCheck, current, needs.cols); err != nil {
return errors.Wrap(err, "column store availability check failed")
}
return nil
}
func doAvailabilityCheck(ctx context.Context, check das.AvailabilityChecker, current primitives.Slot, blks []blocks.ROBlock) error {
if len(blks) == 0 {
return nil
}
// Double check that the checker is non-nil.
if check == nil {
return errMissingAvailabilityChecker
}
return check.IsDataAvailable(ctx, current, blks...)
}
// daNeeds is a helper type that groups blocks by their DA type.
type daNeeds struct {
blobs []blocks.ROBlock
cols []blocks.ROBlock
}
// blocksByDaType slices the given blocks into two slices: one for deneb blocks (BlobSidecar)
// and one for fulu blocks (DataColumnSidecar). Blocks that are pre-deneb or have no
// blob commitments are skipped.
func (m *checkMultiplexer) blockDaNeeds(blks []blocks.ROBlock) (daNeeds, error) {
needs := daNeeds{}
blobs, cols := safeRange{}, safeRange{}
for i, blk := range blks {
ui := uint(i)
slot := blk.Block().Slot()
// Skip blocks that are pre-deneb or with no commitments.
if slot < m.denebStart {
continue
}
cmts, err := blk.Block().Body().BlobKzgCommitments()
if err != nil {
return needs, err
}
if len(cmts) == 0 {
continue
}
if slot >= m.fuluStart {
if cols.isZero() {
cols.start = ui
}
cols.end = ui + 1
continue
}
// slot is >= deneb and < fulu.
if blobs.isZero() {
blobs.start = ui
}
blobs.end = ui + 1
}
var err error
needs.blobs, err = subSlice(blks, blobs)
if err != nil {
return needs, errors.Wrap(err, "slicing deneb blocks")
}
needs.cols, err = subSlice(blks, cols)
if err != nil {
return needs, errors.Wrap(err, "slicing fulu blocks")
}
return needs, nil
}
// safeRange is a helper type that enforces safe slicing.
type safeRange struct {
start uint
end uint
}
// isZero returns true if the range is zero-length.
func (r safeRange) isZero() bool {
return r.start == r.end
}
// subSlice returns the subslice of s defined by sub
// if it can be safely sliced, or an error if the range is invalid
// with respect to the slice.
func subSlice[T any](s []T, sub safeRange) ([]T, error) {
slen := uint(len(s))
if slen == 0 || sub.isZero() {
return nil, nil
}
// Check that minimum bound is safe.
if sub.end < sub.start {
return nil, errUnsafeRange
}
// Check that upper bound is safe.
if sub.start >= slen || sub.end > slen {
return nil, errUnsafeRange
}
return s[sub.start:sub.end], nil
}

View File

@@ -0,0 +1,362 @@
package backfill
import (
"testing"
"github.com/OffchainLabs/prysm/v7/beacon-chain/das"
"github.com/OffchainLabs/prysm/v7/config/params"
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
"github.com/OffchainLabs/prysm/v7/testing/require"
"github.com/OffchainLabs/prysm/v7/testing/util"
"github.com/OffchainLabs/prysm/v7/time/slots"
"github.com/pkg/errors"
)
type mockChecker struct {
}
var mockAvailabilityFailure = errors.New("fake error from IsDataAvailable")
var mockColumnFailure = errors.Wrap(mockAvailabilityFailure, "column checker failure")
var mockBlobFailure = errors.Wrap(mockAvailabilityFailure, "blob checker failure")
func TestNewCheckMultiplexer(t *testing.T) {
denebSlot, fuluSlot := testDenebAndFuluSlots(t)
cases := []struct {
name string
batch func() batch
setupChecker func(*checkMultiplexer)
current primitives.Slot
err error
}{
{
name: "no availability checkers, no blocks",
batch: func() batch { return batch{} },
},
{
name: "no blob availability checkers, deneb blocks",
batch: func() batch {
blks, _ := testBlobGen(t, denebSlot, 2)
return batch{
blocks: blks,
}
},
setupChecker: func(m *checkMultiplexer) {
// Provide a column checker which should be unused in this test.
m.colCheck = &das.MockAvailabilityStore{}
},
err: errMissingAvailabilityChecker,
},
{
name: "no column availability checker, fulu blocks",
batch: func() batch {
blks, _ := testBlobGen(t, fuluSlot, 2)
return batch{
blocks: blks,
}
},
err: errMissingAvailabilityChecker,
setupChecker: func(m *checkMultiplexer) {
// Provide a blob checker which should be unused in this test.
m.blobCheck = &das.MockAvailabilityStore{}
},
},
{
name: "has column availability checker, fulu blocks",
batch: func() batch {
blks, _ := testBlobGen(t, fuluSlot, 2)
return batch{
blocks: blks,
}
},
setupChecker: func(m *checkMultiplexer) {
// Provide a blob checker which should be unused in this test.
m.colCheck = &das.MockAvailabilityStore{}
},
},
{
name: "has blob availability checker, deneb blocks",
batch: func() batch {
blks, _ := testBlobGen(t, denebSlot, 2)
return batch{
blocks: blks,
}
},
setupChecker: func(m *checkMultiplexer) {
// Provide a blob checker which should be unused in this test.
m.blobCheck = &das.MockAvailabilityStore{}
},
},
{
name: "has blob but not col availability checker, deneb and fulu blocks",
batch: func() batch {
blks, _ := testBlobGen(t, fuluSlot-2, 4) // spans deneb and fulu
return batch{
blocks: blks,
}
},
err: errMissingAvailabilityChecker, // fails because column store is not present
setupChecker: func(m *checkMultiplexer) {
m.blobCheck = &das.MockAvailabilityStore{}
},
},
{
name: "has col but not blob availability checker, deneb and fulu blocks",
batch: func() batch {
blks, _ := testBlobGen(t, fuluSlot-2, 4) // spans deneb and fulu
return batch{
blocks: blks,
}
},
err: errMissingAvailabilityChecker, // fails because column store is not present
setupChecker: func(m *checkMultiplexer) {
m.colCheck = &das.MockAvailabilityStore{}
},
},
{
name: "both checkers, deneb and fulu blocks",
batch: func() batch {
blks, _ := testBlobGen(t, fuluSlot-2, 4) // spans deneb and fulu
return batch{
blocks: blks,
}
},
setupChecker: func(m *checkMultiplexer) {
m.blobCheck = &das.MockAvailabilityStore{}
m.colCheck = &das.MockAvailabilityStore{}
},
},
{
name: "deneb checker fails, deneb and fulu blocks",
batch: func() batch {
blks, _ := testBlobGen(t, fuluSlot-2, 4) // spans deneb and fulu
return batch{
blocks: blks,
}
},
err: mockBlobFailure,
setupChecker: func(m *checkMultiplexer) {
m.blobCheck = &das.MockAvailabilityStore{ErrIsDataAvailable: mockBlobFailure}
m.colCheck = &das.MockAvailabilityStore{}
},
},
{
name: "fulu checker fails, deneb and fulu blocks",
batch: func() batch {
blks, _ := testBlobGen(t, fuluSlot-2, 4) // spans deneb and fulu
return batch{
blocks: blks,
}
},
err: mockBlobFailure,
setupChecker: func(m *checkMultiplexer) {
m.blobCheck = &das.MockAvailabilityStore{}
m.colCheck = &das.MockAvailabilityStore{ErrIsDataAvailable: mockBlobFailure}
},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
b := tc.batch()
var checker *checkMultiplexer
checker = newCheckMultiplexer(fuluSlot, denebSlot, b)
if tc.setupChecker != nil {
tc.setupChecker(checker)
}
err := checker.IsDataAvailable(t.Context(), tc.current, b.blocks...)
if tc.err != nil {
require.ErrorIs(t, err, tc.err)
} else {
require.NoError(t, err)
}
})
}
}
func testBlocksWithCommitments(t *testing.T, startSlot primitives.Slot, count int) []blocks.ROBlock {
blks := make([]blocks.ROBlock, count)
for i := 0; i < count; i++ {
blk, _ := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, startSlot+primitives.Slot(i), 1)
blks[i] = blk
}
return blks
}
func TestDaNeeds(t *testing.T) {
denebSlot, fuluSlot := testDenebAndFuluSlots(t)
mux := &checkMultiplexer{
denebStart: denebSlot,
fuluStart: fuluSlot,
}
cases := []struct {
name string
setup func() (daNeeds, []blocks.ROBlock)
expect daNeeds
err error
}{
{
name: "empty range",
setup: func() (daNeeds, []blocks.ROBlock) {
return daNeeds{}, testBlocksWithCommitments(t, 10, 5)
},
},
{
name: "single deneb block",
setup: func() (daNeeds, []blocks.ROBlock) {
blks := testBlocksWithCommitments(t, denebSlot, 1)
return daNeeds{
blobs: []blocks.ROBlock{blks[0]},
}, blks
},
},
{
name: "single fulu block",
setup: func() (daNeeds, []blocks.ROBlock) {
blks := testBlocksWithCommitments(t, fuluSlot, 1)
return daNeeds{
cols: []blocks.ROBlock{blks[0]},
}, blks
},
},
{
name: "deneb range",
setup: func() (daNeeds, []blocks.ROBlock) {
blks := testBlocksWithCommitments(t, denebSlot, 3)
return daNeeds{
blobs: blks,
}, blks
},
},
{
name: "one deneb one fulu",
setup: func() (daNeeds, []blocks.ROBlock) {
deneb := testBlocksWithCommitments(t, denebSlot, 1)
fulu := testBlocksWithCommitments(t, fuluSlot, 1)
return daNeeds{
blobs: []blocks.ROBlock{deneb[0]},
cols: []blocks.ROBlock{fulu[0]},
}, append(deneb, fulu...)
},
},
{
name: "deneb and fulu range",
setup: func() (daNeeds, []blocks.ROBlock) {
deneb := testBlocksWithCommitments(t, denebSlot, 3)
fulu := testBlocksWithCommitments(t, fuluSlot, 3)
return daNeeds{
blobs: deneb,
cols: fulu,
}, append(deneb, fulu...)
},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
expectNeeds, blks := tc.setup()
needs, err := mux.blockDaNeeds(blks)
if tc.err != nil {
require.ErrorIs(t, err, tc.err)
} else {
require.NoError(t, err)
}
expectBlob := make(map[[32]byte]struct{})
for _, blk := range expectNeeds.blobs {
expectBlob[blk.Root()] = struct{}{}
}
for _, blk := range needs.blobs {
_, ok := expectBlob[blk.Root()]
require.Equal(t, true, ok, "unexpected blob block root %#x", blk.Root())
delete(expectBlob, blk.Root())
}
require.Equal(t, 0, len(expectBlob), "missing blob blocks")
expectCol := make(map[[32]byte]struct{})
for _, blk := range expectNeeds.cols {
expectCol[blk.Root()] = struct{}{}
}
for _, blk := range needs.cols {
_, ok := expectCol[blk.Root()]
require.Equal(t, true, ok, "unexpected col block root %#x", blk.Root())
delete(expectCol, blk.Root())
}
require.Equal(t, 0, len(expectCol), "missing col blocks")
})
}
}
func TestSafeRange(t *testing.T) {
cases := []struct {
name string
sr safeRange
err error
slice []int
expected []int
}{
{
name: "zero range",
sr: safeRange{},
slice: []int{0, 1, 2},
},
{
name: "valid range",
sr: safeRange{start: 1, end: 3},
expected: []int{1, 2},
slice: []int{0, 1, 2},
},
{
name: "start greater than end",
sr: safeRange{start: 3, end: 2},
err: errUnsafeRange,
slice: []int{0, 1, 2},
},
{
name: "end out of bounds",
sr: safeRange{start: 1, end: 5},
err: errUnsafeRange,
slice: []int{0, 1, 2},
},
{
name: "start out of bounds",
sr: safeRange{start: 5, end: 6},
err: errUnsafeRange,
slice: []int{0, 1, 2},
},
{
name: "no error for empty slice",
sr: safeRange{start: 6, end: 5},
slice: []int{},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
sub, err := subSlice(tc.slice, tc.sr)
if tc.err != nil {
require.ErrorIs(t, err, tc.err)
return
} else {
require.NoError(t, err)
}
require.Equal(t, len(tc.expected), len(sub))
for i := range tc.expected {
require.Equal(t, tc.expected[i], sub[i])
}
})
}
}
func testDenebAndFuluSlots(t *testing.T) (primitives.Slot, primitives.Slot) {
params.SetupTestConfigCleanup(t)
denebEpoch := params.BeaconConfig().DenebForkEpoch
if params.BeaconConfig().FuluForkEpoch == params.BeaconConfig().FarFutureEpoch {
params.BeaconConfig().FuluForkEpoch = denebEpoch + 4096*2
}
fuluEpoch := params.BeaconConfig().FuluForkEpoch
fuluSlot, err := slots.EpochStart(fuluEpoch)
require.NoError(t, err)
denebSlot, err := slots.EpochStart(denebEpoch)
require.NoError(t, err)
return denebSlot, fuluSlot
}

View File

@@ -1,5 +1,115 @@
package backfill
import "github.com/sirupsen/logrus"
import (
"sync"
"sync/atomic"
"time"
"github.com/sirupsen/logrus"
)
var log = logrus.WithField("prefix", "backfill")
// intervalLogger only logs once for each interval. It only customizes a single
// instance of the entry/logger and should just be used to control the logging rate for
// *one specific line of code*.
type intervalLogger struct {
*logrus.Entry
base *logrus.Entry
mux sync.Mutex
seconds int64 // seconds is the number of seconds per logging interval
last *atomic.Int64 // last is the quantized representation of the last time a log was emitted
now func() time.Time
}
func newIntervalLogger(base *logrus.Entry, secondsBetweenLogs int64) *intervalLogger {
return &intervalLogger{
Entry: base,
base: base,
seconds: secondsBetweenLogs,
last: new(atomic.Int64),
now: time.Now,
}
}
// intervalNumber is a separate pure function because this helps tests determine
// proposer timestamp alignment.
func intervalNumber(t time.Time, seconds int64) int64 {
return t.Unix() / seconds
}
// intervalNumber is the integer division of the current unix timestamp
// divided by the number of seconds per interval.
func (l *intervalLogger) intervalNumber() int64 {
return intervalNumber(l.now(), l.seconds)
}
func (l *intervalLogger) copy() *intervalLogger {
return &intervalLogger{
Entry: l.Entry,
base: l.base,
seconds: l.seconds,
last: l.last,
now: l.now,
}
}
// Log overloads the Log() method of logrus.Entry, which is called under the hood
// when a log-level specific method (like Info(), Warn(), Error()) is invoked.
// By intercepting this call we can rate limit how often we log.
func (l *intervalLogger) Log(level logrus.Level, args ...interface{}) {
n := l.intervalNumber()
// If Swap returns a different value that the current interval number, we haven't
// emitted a log yet this interval, so we can do so now.
if l.last.Swap(n) != n {
l.Entry.Log(level, args...)
}
// reset the Entry to the base so that any WithField/WithError calls
// don't persist across calls to Log()
}
func (l *intervalLogger) WithField(key string, value interface{}) *intervalLogger {
cp := l.copy()
cp.Entry = cp.Entry.WithField(key, value)
return cp
}
func (l *intervalLogger) WithFields(fields logrus.Fields) *intervalLogger {
cp := l.copy()
cp.Entry = cp.Entry.WithFields(fields)
return cp
}
func (l *intervalLogger) WithError(err error) *intervalLogger {
cp := l.copy()
cp.Entry = cp.Entry.WithError(err)
return cp
}
func (l *intervalLogger) Trace(args ...interface{}) {
l.Log(logrus.TraceLevel, args...)
}
func (l *intervalLogger) Debug(args ...interface{}) {
l.Log(logrus.DebugLevel, args...)
}
func (l *intervalLogger) Print(args ...interface{}) {
l.Info(args...)
}
func (l *intervalLogger) Info(args ...interface{}) {
l.Log(logrus.InfoLevel, args...)
}
func (l *intervalLogger) Warn(args ...interface{}) {
l.Log(logrus.WarnLevel, args...)
}
func (l *intervalLogger) Warning(args ...interface{}) {
l.Warn(args...)
}
func (l *intervalLogger) Error(args ...interface{}) {
l.Log(logrus.ErrorLevel, args...)
}

View File

@@ -0,0 +1,379 @@
package backfill
import (
"bytes"
"sync"
"testing"
"time"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
)
// trackingHook is a logrus hook that counts Log callCount for testing.
type trackingHook struct {
mu sync.RWMutex
entries []*logrus.Entry
}
func (h *trackingHook) Levels() []logrus.Level {
return logrus.AllLevels
}
func (h *trackingHook) Fire(entry *logrus.Entry) error {
h.mu.Lock()
defer h.mu.Unlock()
h.entries = append(h.entries, entry)
return nil
}
func (h *trackingHook) callCount() int {
h.mu.RLock()
defer h.mu.RUnlock()
return len(h.entries)
}
func (h *trackingHook) emitted(t *testing.T) []string {
h.mu.RLock()
defer h.mu.RUnlock()
e := make([]string, len(h.entries))
for i, entry := range h.entries {
entry.Buffer = new(bytes.Buffer)
serialized, err := entry.Logger.Formatter.Format(entry)
require.NoError(t, err)
e[i] = string(serialized)
}
return e
}
func entryWithHook() (*logrus.Entry, *trackingHook) {
logger := logrus.New()
logger.SetLevel(logrus.TraceLevel)
hook := &trackingHook{}
logger.AddHook(hook)
entry := logrus.NewEntry(logger)
return entry, hook
}
func intervalSecondsAndDuration(i int) (int64, time.Duration) {
return int64(i), time.Duration(i) * time.Second
}
// mockClock provides a controllable time source for testing.
// It allows tests to set the current time and advance it as needed.
type mockClock struct {
t time.Time
}
// now returns the current time.
func (c *mockClock) now() time.Time {
return c.t
}
func setupMockClock(il *intervalLogger) *mockClock {
// initialize now so that the time aligns with the start of the
// interval bucket. This ensures that adding less than an interval
// of time to the timestamp can never move into the next bucket.
interval := intervalNumber(time.Now(), il.seconds)
now := time.Unix(interval*il.seconds, 0)
clock := &mockClock{t: now}
il.now = clock.now
return clock
}
// TestNewIntervalLogger verifies logger is properly initialized
func TestNewIntervalLogger(t *testing.T) {
base := logrus.NewEntry(logrus.New())
intSec := int64(10)
il := newIntervalLogger(base, intSec)
require.NotNil(t, il)
require.Equal(t, intSec, il.seconds)
require.Equal(t, int64(0), il.last.Load())
require.Equal(t, base, il.Entry)
}
// TestLogOncePerInterval verifies that Log is called only once within an interval window
func TestLogOncePerInterval(t *testing.T) {
entry, hook := entryWithHook()
il := newIntervalLogger(entry, 10)
_ = setupMockClock(il) // use a fixed time to make sure no race is possible
// First log should call the underlying Log method
il.Log(logrus.InfoLevel, "test message 1")
require.Equal(t, 1, hook.callCount())
// Second log in same interval should not call Log
il.Log(logrus.InfoLevel, "test message 2")
require.Equal(t, 1, hook.callCount())
// Third log still in same interval should not call Log
il.Log(logrus.InfoLevel, "test message 3")
require.Equal(t, 1, hook.callCount())
// Verify last is set to current interval
require.Equal(t, il.intervalNumber(), il.last.Load())
}
// TestLogAcrossIntervalBoundary verifies logging at interval boundaries resets correctly
func TestLogAcrossIntervalBoundary(t *testing.T) {
iSec, iDur := intervalSecondsAndDuration(10)
entry, hook := entryWithHook()
il := newIntervalLogger(entry, iSec)
clock := setupMockClock(il)
il.Log(logrus.InfoLevel, "first interval")
require.Equal(t, 1, hook.callCount())
// Log in new interval should succeed
clock.t = clock.t.Add(2 * iDur)
il.Log(logrus.InfoLevel, "second interval")
require.Equal(t, 2, hook.callCount())
}
// TestWithFieldChaining verifies WithField returns logger and can be chained
func TestWithFieldChaining(t *testing.T) {
entry, hook := entryWithHook()
iSec, iDur := intervalSecondsAndDuration(10)
il := newIntervalLogger(entry, iSec)
clock := setupMockClock(il)
result := il.WithField("key1", "value1")
require.NotNil(t, result)
result.Info("test")
require.Equal(t, 1, hook.callCount())
// make sure there was no mutation of the base as a side effect
clock.t = clock.t.Add(iDur)
il.Info("another")
// Verify field is present in logged entry
emitted := hook.emitted(t)
require.Contains(t, emitted[0], "test")
require.Contains(t, emitted[0], "key1=value1")
require.Contains(t, emitted[1], "another")
require.NotContains(t, emitted[1], "key1=value1")
}
// TestWithFieldsChaining verifies WithFields properly adds multiple fields
func TestWithFieldsChaining(t *testing.T) {
entry, hook := entryWithHook()
iSec, iDur := intervalSecondsAndDuration(10)
il := newIntervalLogger(entry, iSec)
clock := setupMockClock(il)
fields := logrus.Fields{
"key1": "value1",
"key2": "value2",
}
result := il.WithFields(fields)
require.NotNil(t, result)
result.Info("test")
require.Equal(t, 1, hook.callCount())
// make sure there was no mutation of the base as a side effect
clock.t = clock.t.Add(iDur)
il.Info("another")
// Verify field is present in logged entry
emitted := hook.emitted(t)
require.Contains(t, emitted[0], "test")
require.Contains(t, emitted[0], "key1=value1")
require.Contains(t, emitted[0], "key2=value2")
require.Contains(t, emitted[1], "another")
require.NotContains(t, emitted[1], "key1=value1")
require.NotContains(t, emitted[1], "key2=value2")
}
// TestWithErrorChaining verifies WithError properly adds error field
func TestWithErrorChaining(t *testing.T) {
entry, hook := entryWithHook()
iSec, iDur := intervalSecondsAndDuration(10)
il := newIntervalLogger(entry, iSec)
clock := setupMockClock(il)
expected := errors.New("lowercase words")
result := il.WithError(expected)
require.NotNil(t, result)
result.Error("test")
require.Equal(t, 1, hook.callCount())
require.NotNil(t, result)
// make sure there was no mutation of the base as a side effect
clock.t = clock.t.Add(iDur)
il.Info("different")
// Verify field is present in logged entry
emitted := hook.emitted(t)
require.Contains(t, emitted[0], expected.Error())
require.Contains(t, emitted[0], "test")
require.Contains(t, emitted[1], "different")
require.NotContains(t, emitted[1], "test")
require.NotContains(t, emitted[1], "lowercase words")
}
// TestLogLevelMethods verifies all log level methods work and respect rate limiting
func TestLogLevelMethods(t *testing.T) {
entry, hook := entryWithHook()
il := newIntervalLogger(entry, 10)
_ = setupMockClock(il) // use a fixed time to make sure no race is possible
// First call from each level-specific method should succeed
il.Trace("trace message")
require.Equal(t, 1, hook.callCount())
// Subsequent callCount in same interval should be suppressed
il.Debug("debug message")
require.Equal(t, 1, hook.callCount())
il.Info("info message")
require.Equal(t, 1, hook.callCount())
il.Print("print message")
require.Equal(t, 1, hook.callCount())
il.Warn("warn message")
require.Equal(t, 1, hook.callCount())
il.Warning("warning message")
require.Equal(t, 1, hook.callCount())
il.Error("error message")
require.Equal(t, 1, hook.callCount())
}
// TestConcurrentLogging verifies multiple goroutines can safely call Log concurrently
func TestConcurrentLogging(t *testing.T) {
entry, hook := entryWithHook()
il := newIntervalLogger(entry, 10)
_ = setupMockClock(il) // use a fixed time to make sure no race is possible
var wg sync.WaitGroup
wait := make(chan struct{})
for i := 0; i < 10; i++ {
wg.Add(1)
go func() {
<-wait
defer wg.Done()
il.Log(logrus.InfoLevel, "concurrent message")
}()
}
close(wait) // maximize raciness by unblocking goroutines together
wg.Wait()
// Only one Log call should succeed across all goroutines in the same interval
require.Equal(t, 1, hook.callCount())
}
// TestZeroInterval verifies behavior with small interval (logs every second)
func TestZeroInterval(t *testing.T) {
entry, hook := entryWithHook()
il := newIntervalLogger(entry, 1)
clock := setupMockClock(il)
il.Log(logrus.InfoLevel, "first")
require.Equal(t, 1, hook.callCount())
// Move to next second
clock.t = clock.t.Add(time.Second)
il.Log(logrus.InfoLevel, "second")
require.Equal(t, 2, hook.callCount())
}
// TestCompleteLoggingFlow tests realistic scenario with repeated logging
func TestCompleteLoggingFlow(t *testing.T) {
entry, hook := entryWithHook()
iSec, iDur := intervalSecondsAndDuration(10)
il := newIntervalLogger(entry, iSec)
clock := setupMockClock(il)
// Add field
il = il.WithField("request_id", "12345")
// Log multiple times in same interval - only first succeeds
il.Info("message 1")
require.Equal(t, 1, hook.callCount())
il.Warn("message 2")
require.Equal(t, 1, hook.callCount())
// Move to next interval
clock.t = clock.t.Add(iDur)
// Should be able to log again in new interval
il.Error("message 3")
require.Equal(t, 2, hook.callCount())
require.NotNil(t, il)
}
// TestAtomicSwapCorrectness verifies atomic swap works correctly
func TestAtomicSwapCorrectness(t *testing.T) {
il := newIntervalLogger(logrus.NewEntry(logrus.New()), 10)
_ = setupMockClock(il) // use a fixed time to make sure no race is possible
// Swap operation should return different value on first call
current := il.intervalNumber()
old := il.last.Swap(current)
require.Equal(t, int64(0), old) // initial value is 0
require.Equal(t, current, il.last.Load())
// Swap with same value should return the same value
old = il.last.Swap(current)
require.Equal(t, current, old)
}
// TestLogMethodsWithClockAdvancement verifies that log methods respect rate limiting
// within an interval but emit again after the interval passes.
func TestLogMethodsWithClockAdvancement(t *testing.T) {
entry, hook := entryWithHook()
iSec, iDur := intervalSecondsAndDuration(10)
il := newIntervalLogger(entry, iSec)
clock := setupMockClock(il)
// First Error call should log
il.Error("error 1")
require.Equal(t, 1, hook.callCount())
// Warn call in same interval should be suppressed
il.Warn("warn 1")
require.Equal(t, 1, hook.callCount())
// Info call in same interval should be suppressed
il.Info("info 1")
require.Equal(t, 1, hook.callCount())
// Debug call in same interval should be suppressed
il.Debug("debug 1")
require.Equal(t, 1, hook.callCount())
// Move forward 5 seconds - still in same 10-second interval
require.Equal(t, 5*time.Second, iDur/2)
clock.t = clock.t.Add(iDur / 2)
il.Error("error 2")
require.Equal(t, 1, hook.callCount(), "should still be suppressed within same interval")
firstInterval := il.intervalNumber()
// Move forward to next interval (10 second interval boundary)
clock.t = clock.t.Add(iDur / 2)
nextInterval := il.intervalNumber()
require.NotEqual(t, firstInterval, nextInterval, "should be in new interval now")
il.Error("error 3")
require.Equal(t, 2, hook.callCount(), "should emit in new interval")
// Another call in the new interval should be suppressed
il.Warn("warn 2")
require.Equal(t, 2, hook.callCount())
// Move forward to yet another interval
clock.t = clock.t.Add(iDur)
il.Info("info 2")
require.Equal(t, 3, hook.callCount(), "should emit in third interval")
}

View File

@@ -21,40 +21,24 @@ var (
Help: "Number of batches that are ready to be imported once they can be connected to the existing chain.",
},
)
backfillRemainingBatches = promauto.NewGauge(
batchesRemaining = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "backfill_remaining_batches",
Help: "Backfill remaining batches.",
},
)
backfillBatchesImported = promauto.NewCounter(
batchesImported = promauto.NewCounter(
prometheus.CounterOpts{
Name: "backfill_batches_imported",
Help: "Number of backfill batches downloaded and imported.",
},
)
backfillBlocksApproximateBytes = promauto.NewCounter(
prometheus.CounterOpts{
Name: "backfill_blocks_bytes_downloaded",
Help: "BeaconBlock bytes downloaded from peers for backfill.",
},
)
backfillBlobsApproximateBytes = promauto.NewCounter(
prometheus.CounterOpts{
Name: "backfill_blobs_bytes_downloaded",
Help: "BlobSidecar bytes downloaded from peers for backfill.",
},
)
backfillBlobsDownloadCount = promauto.NewCounter(
prometheus.CounterOpts{
Name: "backfill_blobs_download_count",
Help: "Number of BlobSidecar values downloaded from peers for backfill.",
},
)
backfillBlocksDownloadCount = promauto.NewCounter(
prometheus.CounterOpts{
Name: "backfill_blocks_download_count",
Help: "Number of BeaconBlock values downloaded from peers for backfill.",
backfillBatchTimeWaiting = promauto.NewHistogram(
prometheus.HistogramOpts{
Name: "backfill_batch_time_waiting",
Help: "Time batch waited for a suitable peer.",
Buckets: []float64{50, 100, 300, 1000, 2000},
},
)
backfillBatchTimeRoundtrip = promauto.NewHistogram(
@@ -64,43 +48,90 @@ var (
Buckets: []float64{400, 800, 1600, 3200, 6400, 12800},
},
)
backfillBatchTimeWaiting = promauto.NewHistogram(
prometheus.HistogramOpts{
Name: "backfill_batch_time_waiting",
Help: "Time batch waited for a suitable peer.",
Buckets: []float64{50, 100, 300, 1000, 2000},
blockDownloadCount = promauto.NewCounter(
prometheus.CounterOpts{
Name: "backfill_blocks_download_count",
Help: "Number of BeaconBlock values downloaded from peers for backfill.",
},
)
backfillBatchTimeDownloadingBlocks = promauto.NewHistogram(
blockDownloadBytesApprox = promauto.NewCounter(
prometheus.CounterOpts{
Name: "backfill_blocks_bytes_downloaded",
Help: "BeaconBlock bytes downloaded from peers for backfill.",
},
)
blockDownloadMs = promauto.NewHistogram(
prometheus.HistogramOpts{
Name: "backfill_batch_blocks_time_download",
Help: "Time, in milliseconds, batch spent downloading blocks from peer.",
Help: "BeaconBlock download time, in ms.",
Buckets: []float64{100, 300, 1000, 2000, 4000, 8000},
},
)
backfillBatchTimeDownloadingBlobs = promauto.NewHistogram(
prometheus.HistogramOpts{
Name: "backfill_batch_blobs_time_download",
Help: "Time, in milliseconds, batch spent downloading blobs from peer.",
Buckets: []float64{100, 300, 1000, 2000, 4000, 8000},
},
)
backfillBatchTimeVerifying = promauto.NewHistogram(
blockVerifyMs = promauto.NewHistogram(
prometheus.HistogramOpts{
Name: "backfill_batch_time_verify",
Help: "Time batch spent downloading blocks from peer.",
Help: "BeaconBlock verification time, in ms.",
Buckets: []float64{100, 300, 1000, 2000, 4000, 8000},
},
)
blobSidecarDownloadCount = promauto.NewCounter(
prometheus.CounterOpts{
Name: "backfill_blobs_download_count",
Help: "Number of BlobSidecar values downloaded from peers for backfill.",
},
)
blobSidecarDownloadBytesApprox = promauto.NewCounter(
prometheus.CounterOpts{
Name: "backfill_blobs_bytes_downloaded",
Help: "BlobSidecar bytes downloaded from peers for backfill.",
},
)
blobSidecarDownloadMs = promauto.NewHistogram(
prometheus.HistogramOpts{
Name: "backfill_batch_blobs_time_download",
Help: "BlobSidecar download time, in ms.",
Buckets: []float64{100, 300, 1000, 2000, 4000, 8000},
},
)
dataColumnSidecarDownloadCount = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "backfill_data_column_sidecar_downloaded",
Help: "Number of DataColumnSidecar values downloaded from peers for backfill.",
},
[]string{"index", "validity"},
)
dataColumnSidecarDownloadBytes = promauto.NewCounter(
prometheus.CounterOpts{
Name: "backfill_data_column_sidecar_bytes_downloaded",
Help: "DataColumnSidecar bytes downloaded from peers for backfill.",
},
)
dataColumnSidecarDownloadMs = promauto.NewHistogram(
prometheus.HistogramOpts{
Name: "backfill_batch_columns_time_download",
Help: "DataColumnSidecars download time, in ms.",
Buckets: []float64{100, 300, 1000, 2000, 4000, 8000},
},
)
dataColumnSidecarVerifyMs = promauto.NewHistogram(
prometheus.HistogramOpts{
Name: "backfill_batch_columns_time_verify",
Help: "DataColumnSidecars verification time, in ms.",
Buckets: []float64{100, 300, 1000, 2000, 4000, 8000},
},
)
)
func blobValidationMetrics(_ blocks.ROBlob) error {
backfillBlobsDownloadCount.Inc()
blobSidecarDownloadCount.Inc()
return nil
}
func blockValidationMetrics(interfaces.ReadOnlySignedBeaconBlock) error {
backfillBlocksDownloadCount.Inc()
blockDownloadCount.Inc()
return nil
}

View File

@@ -2,22 +2,21 @@ package backfill
import (
"context"
"math"
"time"
"github.com/OffchainLabs/prysm/v7/beacon-chain/db/filesystem"
"github.com/OffchainLabs/prysm/v7/beacon-chain/core/peerdas"
"github.com/OffchainLabs/prysm/v7/beacon-chain/p2p"
"github.com/OffchainLabs/prysm/v7/beacon-chain/p2p/peers"
"github.com/OffchainLabs/prysm/v7/beacon-chain/startup"
"github.com/OffchainLabs/prysm/v7/beacon-chain/sync"
"github.com/OffchainLabs/prysm/v7/beacon-chain/verification"
"github.com/OffchainLabs/prysm/v7/config/params"
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
"github.com/OffchainLabs/prysm/v7/time/slots"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/pkg/errors"
)
type batchWorkerPool interface {
spawn(ctx context.Context, n int, clock *startup.Clock, a PeerAssigner, v *verifier, cm sync.ContextByteVersions, blobVerifier verification.NewBlobVerifier, bfs *filesystem.BlobStorage)
spawn(ctx context.Context, n int, a PeerAssigner, cfg *workerCfg)
todo(b batch)
complete() (batch, error)
}
@@ -26,25 +25,35 @@ type worker interface {
run(context.Context)
}
type newWorker func(id workerId, in, out chan batch, c *startup.Clock, v *verifier, cm sync.ContextByteVersions, nbv verification.NewBlobVerifier, bfs *filesystem.BlobStorage) worker
type newWorker func(id workerId, in, out chan batch, cfg *workerCfg) worker
func defaultNewWorker(p p2p.P2P) newWorker {
return func(id workerId, in, out chan batch, c *startup.Clock, v *verifier, cm sync.ContextByteVersions, nbv verification.NewBlobVerifier, bfs *filesystem.BlobStorage) worker {
return newP2pWorker(id, p, in, out, c, v, cm, nbv, bfs)
return func(id workerId, in, out chan batch, cfg *workerCfg) worker {
return newP2pWorker(id, p, in, out, cfg)
}
}
// minRequestInterval is the minimum amount of time between requests.
// ie a value of 1s means we'll make ~1 req/sec per peer.
const (
minReqInterval = time.Second
)
type p2pBatchWorkerPool struct {
maxBatches int
newWorker newWorker
toWorkers chan batch
fromWorkers chan batch
toRouter chan batch
fromRouter chan batch
shutdownErr chan error
endSeq []batch
ctx context.Context
cancel func()
maxBatches int
newWorker newWorker
toWorkers chan batch
fromWorkers chan batch
toRouter chan batch
fromRouter chan batch
shutdownErr chan error
endSeq []batch
ctx context.Context
cancel func()
earliest primitives.Slot
peerCache *sync.DASPeerCache
p2p p2p.P2P
peerFailLogger *intervalLogger
}
var _ batchWorkerPool = &p2pBatchWorkerPool{}
@@ -52,21 +61,24 @@ var _ batchWorkerPool = &p2pBatchWorkerPool{}
func newP2PBatchWorkerPool(p p2p.P2P, maxBatches int) *p2pBatchWorkerPool {
nw := defaultNewWorker(p)
return &p2pBatchWorkerPool{
newWorker: nw,
toRouter: make(chan batch, maxBatches),
fromRouter: make(chan batch, maxBatches),
toWorkers: make(chan batch),
fromWorkers: make(chan batch),
maxBatches: maxBatches,
shutdownErr: make(chan error),
newWorker: nw,
toRouter: make(chan batch, maxBatches),
fromRouter: make(chan batch, maxBatches),
toWorkers: make(chan batch),
fromWorkers: make(chan batch),
maxBatches: maxBatches,
shutdownErr: make(chan error),
peerCache: sync.NewDASPeerCache(p),
p2p: p,
peerFailLogger: newIntervalLogger(log, 5),
}
}
func (p *p2pBatchWorkerPool) spawn(ctx context.Context, n int, c *startup.Clock, a PeerAssigner, v *verifier, cm sync.ContextByteVersions, nbv verification.NewBlobVerifier, bfs *filesystem.BlobStorage) {
func (p *p2pBatchWorkerPool) spawn(ctx context.Context, n int, a PeerAssigner, cfg *workerCfg) {
p.ctx, p.cancel = context.WithCancel(ctx)
go p.batchRouter(a)
for i := 0; i < n; i++ {
go p.newWorker(workerId(i), p.toWorkers, p.fromWorkers, c, v, cm, nbv, bfs).run(p.ctx)
go p.newWorker(workerId(i), p.toWorkers, p.fromWorkers, cfg).run(p.ctx)
}
}
@@ -103,7 +115,6 @@ func (p *p2pBatchWorkerPool) batchRouter(pa PeerAssigner) {
busy := make(map[peer.ID]bool)
todo := make([]batch, 0)
rt := time.NewTicker(time.Second)
earliest := primitives.Slot(math.MaxUint64)
for {
select {
case b := <-p.toRouter:
@@ -115,51 +126,125 @@ func (p *p2pBatchWorkerPool) batchRouter(pa PeerAssigner) {
// This ticker exists to periodically break out of the channel select
// to retry failed assignments.
case b := <-p.fromWorkers:
pid := b.busy
busy[pid] = false
if b.state == batchBlobSync {
todo = append(todo, b)
sortBatchDesc(todo)
} else {
p.fromRouter <- b
if b.state == batchErrFatal {
p.shutdown(b.err)
}
pid := b.assignedPeer
delete(busy, pid)
if b.workComplete() {
p.fromRouter <- b
break
}
todo = append(todo, b)
sortBatchDesc(todo)
case <-p.ctx.Done():
log.WithError(p.ctx.Err()).Info("p2pBatchWorkerPool context canceled, shutting down")
p.shutdown(p.ctx.Err())
return
}
if len(todo) == 0 {
continue
}
// Try to assign as many outstanding batches as possible to peers and feed the assigned batches to workers.
assigned, err := pa.Assign(busy, len(todo))
var err error
todo, err = p.processTodo(todo, pa, busy)
if err != nil {
if errors.Is(err, peers.ErrInsufficientSuitable) {
// Transient error resulting from insufficient number of connected peers. Leave batches in
// queue and get to them whenever the peer situation is resolved.
continue
}
p.shutdown(err)
return
}
for _, pid := range assigned {
if err := todo[0].waitUntilReady(p.ctx); err != nil {
log.WithError(p.ctx.Err()).Info("p2pBatchWorkerPool context canceled, shutting down")
p.shutdown(p.ctx.Err())
return
}
busy[pid] = true
todo[0].busy = pid
p.toWorkers <- todo[0].withPeer(pid)
if todo[0].begin < earliest {
earliest = todo[0].begin
oldestBatch.Set(float64(earliest))
}
todo = todo[1:]
}
}
}
func (p *p2pBatchWorkerPool) processTodo(todo []batch, pa PeerAssigner, busy map[peer.ID]bool) ([]batch, error) {
if len(todo) == 0 {
return todo, nil
}
notBusy, err := pa.Assign(peers.NotBusy(busy))
if err != nil {
if errors.Is(err, peers.ErrInsufficientSuitable) {
// Transient error resulting from insufficient number of connected peers. Leave batches in
// queue and get to them whenever the peer situation is resolved.
return todo, nil
}
return nil, err
}
if len(notBusy) == 0 {
log.Warn("No suitable peers available for batch assignment")
return todo, nil
}
custodied := peerdas.NewColumnIndices()
if highestEpoch(todo) >= params.BeaconConfig().FuluForkEpoch {
custodied, err = currentCustodiedColumns(p.ctx, p.p2p)
if err != nil {
return nil, errors.Wrap(err, "current custodied columns")
}
}
picker, err := p.peerCache.NewPicker(notBusy, custodied, minReqInterval)
if err != nil {
log.WithError(err).Error("Failed to compute column-weighted peer scores")
return todo, nil
}
for i, b := range todo {
excludePeers := busy
if b.state == batchErrFatal {
// Fatal error detected in batch, shut down the pool.
return nil, b.err
}
if b.state == batchErrRetryable {
// Columns can fail in a partial fashion, so we nee to reset
// components that track peer interactions for multiple columns
// to enable partial retries.
b = resetRetryableColumns(b)
// Set the next correct state after retryable error
b = b.transitionToNext()
if b.state == batchSequenced {
// Transitioning to batchSequenced means we need to download a new block batch because there was
// a problem making or verifying the last block request, so we should try to pick a different peer this time.
excludePeers = busyCopy(busy)
excludePeers[b.blockPeer] = true
b.blockPeer = "" // reset block peer so we can fail back to it next time if there is an issue with assignment.
}
}
pid, cols, err := b.selectPeer(picker, excludePeers)
if err != nil {
p.peerFailLogger.WithField("notBusy", len(notBusy)).WithError(err).WithFields(b.logFields()).Error("Failed to select peer for batch")
// Return the remaining todo items and allow the outer loop to control when we try again.
return todo[i:], nil
}
busy[pid] = true
b.assignedPeer = pid
b.nextReqCols = cols
backfillBatchTimeWaiting.Observe(float64(time.Since(b.scheduled).Milliseconds()))
p.toWorkers <- b
p.updateEarliest(b.begin)
}
return []batch{}, nil
}
func busyCopy(busy map[peer.ID]bool) map[peer.ID]bool {
busyCp := make(map[peer.ID]bool, len(busy))
for k, v := range busy {
busyCp[k] = v
}
return busyCp
}
func highestEpoch(batches []batch) primitives.Epoch {
highest := primitives.Epoch(0)
for _, b := range batches {
epoch := slots.ToEpoch(b.end - 1)
if epoch > highest {
highest = epoch
}
}
return highest
}
func (p *p2pBatchWorkerPool) updateEarliest(current primitives.Slot) {
if current >= p.earliest {
return
}
p.earliest = current
oldestBatch.Set(float64(p.earliest))
}
func (p *p2pBatchWorkerPool) shutdown(err error) {
p.cancel()
p.shutdownErr <- err

View File

@@ -6,6 +6,7 @@ import (
"time"
"github.com/OffchainLabs/prysm/v7/beacon-chain/db/filesystem"
"github.com/OffchainLabs/prysm/v7/beacon-chain/p2p/peers"
p2ptest "github.com/OffchainLabs/prysm/v7/beacon-chain/p2p/testing"
"github.com/OffchainLabs/prysm/v7/beacon-chain/startup"
"github.com/OffchainLabs/prysm/v7/beacon-chain/sync"
@@ -24,7 +25,7 @@ type mockAssigner struct {
// Assign satisfies the PeerAssigner interface so that mockAssigner can be used in tests
// in place of the concrete p2p implementation of PeerAssigner.
func (m mockAssigner) Assign(busy map[peer.ID]bool, n int) ([]peer.ID, error) {
func (m mockAssigner) Assign(filter peers.AssignmentFilter) ([]peer.ID, error) {
if m.err != nil {
return nil, m.err
}
@@ -53,7 +54,8 @@ func TestPoolDetectAllEnded(t *testing.T) {
ctxMap, err := sync.ContextByteVersionsForValRoot(bytesutil.ToBytes32(st.GenesisValidatorsRoot()))
require.NoError(t, err)
bfs := filesystem.NewEphemeralBlobStorage(t)
pool.spawn(ctx, nw, startup.NewClock(time.Now(), [32]byte{}), ma, v, ctxMap, mockNewBlobVerifier, bfs)
wcfg := &workerCfg{clock: startup.NewClock(time.Now(), [32]byte{}), newVB: mockNewBlobVerifier, verifier: v, ctxMap: ctxMap, blobStore: bfs}
pool.spawn(ctx, nw, ma, wcfg)
br := batcher{min: 10, size: 10}
endSeq := br.before(0)
require.Equal(t, batchEndSequence, endSeq.state)
@@ -72,7 +74,7 @@ type mockPool struct {
todoChan chan batch
}
func (m *mockPool) spawn(_ context.Context, _ int, _ *startup.Clock, _ PeerAssigner, _ *verifier, _ sync.ContextByteVersions, _ verification.NewBlobVerifier, _ *filesystem.BlobStorage) {
func (m *mockPool) spawn(_ context.Context, _ int, _ PeerAssigner, _ *workerCfg) {
}
func (m *mockPool) todo(b batch) {

View File

@@ -5,8 +5,8 @@ import (
"github.com/OffchainLabs/prysm/v7/beacon-chain/db/filesystem"
"github.com/OffchainLabs/prysm/v7/beacon-chain/p2p"
"github.com/OffchainLabs/prysm/v7/beacon-chain/p2p/peers"
"github.com/OffchainLabs/prysm/v7/beacon-chain/startup"
"github.com/OffchainLabs/prysm/v7/beacon-chain/sync"
"github.com/OffchainLabs/prysm/v7/beacon-chain/verification"
"github.com/OffchainLabs/prysm/v7/config/params"
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
@@ -21,26 +21,27 @@ import (
)
type Service struct {
ctx context.Context
enabled bool // service is disabled by default
clock *startup.Clock
store *Store
ms minimumSlotter
cw startup.ClockWaiter
verifierWaiter InitializerWaiter
newBlobVerifier verification.NewBlobVerifier
nWorkers int
batchSeq *batchSequencer
batchSize uint64
pool batchWorkerPool
verifier *verifier
ctxMap sync.ContextByteVersions
p2p p2p.P2P
pa PeerAssigner
batchImporter batchImporter
blobStore *filesystem.BlobStorage
initSyncWaiter func() error
complete chan struct{}
ctx context.Context
enabled bool // service is disabled by default
clock *startup.Clock
store *Store
ms minimumSlotter
cw startup.ClockWaiter
verifierWaiter InitializerWaiter
nWorkers int
batchSeq *batchSequencer
batchSize uint64
pool batchWorkerPool
p2p p2p.P2P
pa PeerAssigner
batchImporter batchImporter
blobStore *filesystem.BlobStorage
dcStore *filesystem.DataColumnStorage
initSyncWaiter func() error
complete chan struct{}
workerCfg *workerCfg
fuluStart primitives.Slot
denebStart primitives.Slot
}
var _ runtime.Service = (*Service)(nil)
@@ -49,23 +50,12 @@ var _ runtime.Service = (*Service)(nil)
// to service an RPC blockRequest. The Assign method takes a map of peers that should be excluded,
// allowing the caller to avoid making multiple concurrent requests to the same peer.
type PeerAssigner interface {
Assign(busy map[peer.ID]bool, n int) ([]peer.ID, error)
Assign(filter peers.AssignmentFilter) ([]peer.ID, error)
}
type minimumSlotter func(primitives.Slot) primitives.Slot
type batchImporter func(ctx context.Context, current primitives.Slot, b batch, su *Store) (*dbval.BackfillStatus, error)
func defaultBatchImporter(ctx context.Context, current primitives.Slot, b batch, su *Store) (*dbval.BackfillStatus, error) {
status := su.status()
if err := b.ensureParent(bytesutil.ToBytes32(status.LowParentRoot)); err != nil {
return status, err
}
// Import blocks to db and update db state to reflect the newly imported blocks.
// Other parts of the beacon node may use the same StatusUpdater instance
// via the coverage.AvailableBlocker interface to safely determine if a given slot has been backfilled.
return su.fillBack(ctx, current, b.results, b.availabilityStore())
}
// ServiceOption represents a functional option for the backfill service constructor.
type ServiceOption func(*Service) error
@@ -140,46 +130,32 @@ func WithMinimumSlot(s primitives.Slot) ServiceOption {
// NewService initializes the backfill Service. Like all implementations of the Service interface,
// the service won't begin its runloop until Start() is called.
func NewService(ctx context.Context, su *Store, bStore *filesystem.BlobStorage, cw startup.ClockWaiter, p p2p.P2P, pa PeerAssigner, opts ...ServiceOption) (*Service, error) {
func NewService(ctx context.Context, su *Store, bStore *filesystem.BlobStorage, dcStore *filesystem.DataColumnStorage, cw startup.ClockWaiter, p p2p.P2P, pa PeerAssigner, opts ...ServiceOption) (*Service, error) {
s := &Service{
ctx: ctx,
store: su,
blobStore: bStore,
cw: cw,
ms: minimumBackfillSlot,
p2p: p,
pa: pa,
batchImporter: defaultBatchImporter,
complete: make(chan struct{}),
ctx: ctx,
store: su,
blobStore: bStore,
dcStore: dcStore,
cw: cw,
ms: minimumBackfillSlot,
p2p: p,
pa: pa,
complete: make(chan struct{}),
fuluStart: slots.SafeEpochStartOrMax(params.BeaconConfig().FuluForkEpoch),
denebStart: slots.SafeEpochStartOrMax(params.BeaconConfig().DenebForkEpoch),
}
s.batchImporter = s.defaultBatchImporter
for _, o := range opts {
if err := o(s); err != nil {
return nil, err
}
}
s.pool = newP2PBatchWorkerPool(p, s.nWorkers)
return s, nil
}
func (s *Service) initVerifier(ctx context.Context) (*verifier, sync.ContextByteVersions, error) {
cps, err := s.store.originState(ctx)
if err != nil {
return nil, nil, err
}
keys, err := cps.PublicKeys()
if err != nil {
return nil, nil, errors.Wrap(err, "unable to retrieve public keys for all validators in the origin state")
}
vr := cps.GenesisValidatorsRoot()
ctxMap, err := sync.ContextByteVersionsForValRoot(bytesutil.ToBytes32(vr))
if err != nil {
return nil, nil, errors.Wrapf(err, "unable to initialize context version map using genesis validator root %#x", vr)
}
v, err := newBackfillVerifier(vr, keys)
return v, ctxMap, err
}
func (s *Service) updateComplete() bool {
b, err := s.pool.complete()
if err != nil {
@@ -201,19 +177,18 @@ func (s *Service) importBatches(ctx context.Context) {
if imported == 0 {
return
}
backfillBatchesImported.Add(float64(imported))
batchesImported.Add(float64(imported))
}()
current := s.clock.CurrentSlot()
for i := range importable {
ib := importable[i]
if len(ib.results) == 0 {
if len(ib.blocks) == 0 {
log.WithFields(ib.logFields()).Error("Batch with no results, skipping importer")
}
_, err := s.batchImporter(ctx, current, ib, s.store)
if err != nil {
log.WithError(err).WithFields(ib.logFields()).Debug("Backfill batch failed to import")
s.downscorePeer(ib.blockPid, "backfillBatchImportError")
s.batchSeq.update(ib.withState(batchErrRetryable))
s.batchSeq.update(ib.withError(err))
// If a batch fails, the subsequent batches are no longer considered importable.
break
}
@@ -227,7 +202,20 @@ func (s *Service) importBatches(ctx context.Context) {
WithField("batchesRemaining", nt).
Info("Backfill batches processed")
backfillRemainingBatches.Set(float64(nt))
batchesRemaining.Set(float64(nt))
}
func (s *Service) defaultBatchImporter(ctx context.Context, current primitives.Slot, b batch, su *Store) (*dbval.BackfillStatus, error) {
status := su.status()
if err := b.ensureParent(bytesutil.ToBytes32(status.LowParentRoot)); err != nil {
return status, err
}
// Import blocks to db and update db state to reflect the newly imported blocks.
// Other parts of the beacon node may use the same StatusUpdater instance
// via the coverage.AvailableBlocker interface to safely determine if a given slot has been backfilled.
checker := newCheckMultiplexer(s.fuluStart, s.denebStart, b)
return su.fillBack(ctx, current, b.blocks, checker)
}
func (s *Service) scheduleTodos() {
@@ -249,18 +237,6 @@ func (s *Service) scheduleTodos() {
}
}
// fuluOrigin checks whether the origin block (ie the checkpoint sync block from which backfill
// syncs backwards) is in an unsupported fork, enabling the backfill service to shut down rather than
// run with buggy behavior.
// This will be removed once DataColumnSidecar support is released.
func fuluOrigin(cfg *params.BeaconChainConfig, status *dbval.BackfillStatus) bool {
originEpoch := slots.ToEpoch(primitives.Slot(status.OriginSlot))
if originEpoch < cfg.FuluForkEpoch {
return false
}
return true
}
// Start begins the runloop of backfill.Service in the current goroutine.
func (s *Service) Start() {
if !s.enabled {
@@ -273,32 +249,20 @@ func (s *Service) Start() {
log.Info("Backfill service is shutting down")
cancel()
}()
clock, err := s.cw.WaitForClock(ctx)
if err != nil {
log.WithError(err).Error("Backfill service failed to start while waiting for genesis data")
return
}
s.clock = clock
v, err := s.verifierWaiter.WaitForInitializer(ctx)
s.newBlobVerifier = newBlobVerifierFromInitializer(v)
if err != nil {
log.WithError(err).Error("Could not initialize blob verifier in backfill service")
return
}
if s.store.isGenesisSync() {
log.Info("Backfill short-circuit; node synced from genesis")
s.markComplete()
return
}
status := s.store.status()
if fuluOrigin(params.BeaconConfig(), status) {
log.WithField("originSlot", s.store.status().OriginSlot).
Warn("backfill disabled; DataColumnSidecar currently unsupported, for updates follow https://github.com/OffchainLabs/prysm/issues/15982")
s.markComplete()
clock, err := s.cw.WaitForClock(ctx)
if err != nil {
log.WithError(err).Error("Backfill service failed to start while waiting for genesis data")
return
}
s.clock = clock
status := s.store.status()
// Exit early if there aren't going to be any batches to backfill.
if primitives.Slot(status.LowSlot) <= s.ms(s.clock.CurrentSlot()) {
log.WithField("minimumRequiredSlot", s.ms(s.clock.CurrentSlot())).
@@ -308,12 +272,6 @@ func (s *Service) Start() {
return
}
s.verifier, s.ctxMap, err = s.initVerifier(ctx)
if err != nil {
log.WithError(err).Error("Unable to initialize backfill verifier")
return
}
if s.initSyncWaiter != nil {
log.Info("Backfill service waiting for initial-sync to reach head before starting")
if err := s.initSyncWaiter(); err != nil {
@@ -321,7 +279,22 @@ func (s *Service) Start() {
return
}
}
s.pool.spawn(ctx, s.nWorkers, clock, s.pa, s.verifier, s.ctxMap, s.newBlobVerifier, s.blobStore)
if s.workerCfg == nil {
s.workerCfg = &workerCfg{
clock: s.clock,
blobStore: s.blobStore,
colStore: s.dcStore,
downscore: s.downscorePeer,
}
s.workerCfg, err = initWorkerCfg(ctx, s.workerCfg, s.verifierWaiter, s.store)
if err != nil {
log.WithError(err).Error("Could not initialize blob verifier in backfill service")
return
}
}
s.pool.spawn(ctx, s.nWorkers, s.pa, s.workerCfg)
s.batchSeq = newBatchSequencer(s.nWorkers, s.ms(s.clock.CurrentSlot()), primitives.Slot(status.LowSlot), primitives.Slot(s.batchSize))
if err = s.initBatches(); err != nil {
log.WithError(err).Error("Non-recoverable error in backfill service")
@@ -386,6 +359,12 @@ func newBlobVerifierFromInitializer(ini *verification.Initializer) verification.
}
}
func newDataColumnVerifierFromInitializer(ini *verification.Initializer) verification.NewDataColumnsVerifier {
return func(cols []blocks.RODataColumn, reqs []verification.Requirement) verification.DataColumnsVerifier {
return ini.NewDataColumnsVerifier(cols, reqs)
}
}
func (s *Service) markComplete() {
close(s.complete)
log.Info("Backfill service marked as complete")
@@ -400,7 +379,11 @@ func (s *Service) WaitForCompletion() error {
}
}
func (s *Service) downscorePeer(peerID peer.ID, reason string) {
func (s *Service) downscorePeer(peerID peer.ID, reason string, err error) {
newScore := s.p2p.Peers().Scorers().BadResponsesScorer().Increment(peerID)
log.WithFields(logrus.Fields{"peerID": peerID, "reason": reason, "newScore": newScore}).Debug("Downscore peer")
logArgs := log.WithFields(logrus.Fields{"peerID": peerID, "reason": reason, "newScore": newScore})
if err != nil {
logArgs = logArgs.WithError(err)
}
logArgs.Debug("Downscore peer")
}

View File

@@ -15,7 +15,6 @@ import (
"github.com/OffchainLabs/prysm/v7/proto/dbval"
"github.com/OffchainLabs/prysm/v7/testing/require"
"github.com/OffchainLabs/prysm/v7/testing/util"
"github.com/OffchainLabs/prysm/v7/time/slots"
)
type mockMinimumSlotter struct {
@@ -57,7 +56,8 @@ func TestServiceInit(t *testing.T) {
pool := &mockPool{todoChan: make(chan batch, nWorkers), finishedChan: make(chan batch, nWorkers)}
p2pt := p2ptest.NewTestP2P(t)
bfs := filesystem.NewEphemeralBlobStorage(t)
srv, err := NewService(ctx, su, bfs, cw, p2pt, &mockAssigner{},
dcs := filesystem.NewEphemeralDataColumnStorage(t)
srv, err := NewService(ctx, su, bfs, dcs, cw, p2pt, &mockAssigner{},
WithBatchSize(batchSize), WithWorkerCount(nWorkers), WithEnableBackfill(true), WithVerifierWaiter(&mockInitalizerWaiter{}))
require.NoError(t, err)
srv.ms = mockMinimumSlotter{min: primitives.Slot(high - batchSize*uint64(nBatches))}.minimumSlot
@@ -132,41 +132,3 @@ func TestBackfillMinSlotDefault(t *testing.T) {
require.Equal(t, specMin, s.ms(current))
})
}
func TestFuluOrigin(t *testing.T) {
cfg := params.BeaconConfig()
fuluEpoch := cfg.FuluForkEpoch
fuluSlot, err := slots.EpochStart(fuluEpoch)
require.NoError(t, err)
cases := []struct {
name string
origin primitives.Slot
isFulu bool
}{
{
name: "before fulu",
origin: fuluSlot - 1,
isFulu: false,
},
{
name: "at fulu",
origin: fuluSlot,
isFulu: true,
},
{
name: "after fulu",
origin: fuluSlot + 1,
isFulu: true,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
status := &dbval.BackfillStatus{
OriginSlot: uint64(tc.origin),
}
result := fuluOrigin(cfg, status)
require.Equal(t, tc.isFulu, result)
})
}
}

View File

@@ -74,7 +74,7 @@ func (s *Store) status() *dbval.BackfillStatus {
// fillBack saves the slice of blocks and updates the BackfillStatus LowSlot/Root/ParentRoot tracker to the values
// from the first block in the slice. This method assumes that the block slice has been fully validated and
// sorted in slot order by the calling function.
func (s *Store) fillBack(ctx context.Context, current primitives.Slot, blocks []blocks.ROBlock, store das.AvailabilityStore) (*dbval.BackfillStatus, error) {
func (s *Store) fillBack(ctx context.Context, current primitives.Slot, blocks []blocks.ROBlock, store das.AvailabilityChecker) (*dbval.BackfillStatus, error) {
status := s.status()
if len(blocks) == 0 {
return status, nil
@@ -88,10 +88,8 @@ func (s *Store) fillBack(ctx context.Context, current primitives.Slot, blocks []
status.LowParentRoot, highest.Root(), status.LowSlot, highest.Block().Slot())
}
for i := range blocks {
if err := store.IsDataAvailable(ctx, current, blocks[i]); err != nil {
return nil, err
}
if err := store.IsDataAvailable(ctx, current, blocks...); err != nil {
return nil, err
}
if err := s.store.SaveROBlocks(ctx, blocks, false); err != nil {

View File

@@ -5,7 +5,6 @@ import (
fieldparams "github.com/OffchainLabs/prysm/v7/config/fieldparams"
"github.com/OffchainLabs/prysm/v7/config/params"
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
"github.com/OffchainLabs/prysm/v7/consensus-types/interfaces"
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
"github.com/OffchainLabs/prysm/v7/crypto/bls"
"github.com/OffchainLabs/prysm/v7/encoding/bytesutil"
@@ -14,26 +13,49 @@ import (
"github.com/pkg/errors"
)
var errInvalidBatchChain = errors.New("parent_root of block does not match the previous block's root")
var errProposerIndexTooHigh = errors.New("proposer index not present in origin state")
var errUnknownDomain = errors.New("runtime error looking up signing domain for fork")
var (
errInvalidBlocks = errors.New("block validation failure")
errInvalidBatchChain = errors.Wrap(errInvalidBlocks, "parent_root of block does not match the previous block's root")
errProposerIndexTooHigh = errors.Wrap(errInvalidBlocks, "proposer index not present in origin state")
errUnknownDomain = errors.Wrap(errInvalidBlocks, "runtime error looking up signing domain for fork")
errBatchSignatureFailed = errors.Wrap(errInvalidBlocks, "failed to verify block signature in batch")
errInvalidSignatureData = errors.Wrap(errInvalidBlocks, "could not verify signatures in block batch due to invalid signature data")
errEmptyVerificationSet = errors.New("no blocks to verify in batch")
)
// verifiedROBlocks represents a slice of blocks that have passed signature verification.
type verifiedROBlocks []blocks.ROBlock
func (v verifiedROBlocks) blobIdents(retentionStart primitives.Slot) ([]blobSummary, error) {
// early return if the newest block is outside the retention window
if len(v) > 0 && v[len(v)-1].Block().Slot() < retentionStart {
if len(v) == 0 {
return nil, nil
}
latest := v[len(v)-1].Block().Slot()
// early return if the newest block is outside the retention window
if latest < retentionStart {
return nil, nil
}
fuluStart := params.BeaconConfig().FuluForkEpoch
// If the batch end slot or last result block are pre-fulu, so are the rest.
if slots.ToEpoch(latest) >= fuluStart {
return nil, nil
}
bs := make([]blobSummary, 0)
for i := range v {
if v[i].Block().Slot() < retentionStart {
slot := v[i].Block().Slot()
if slot < retentionStart {
continue
}
if v[i].Block().Version() < version.Deneb {
continue
}
// Assuming blocks are sorted, as soon as we see 1 fulu block we know the rest will also be fulu.
if slots.ToEpoch(slot) >= fuluStart {
return bs, nil
}
c, err := v[i].Block().Body().BlobKzgCommitments()
if err != nil {
return nil, errors.Wrapf(err, "unexpected error checking commitments for block root %#x", v[i].Root())
@@ -56,37 +78,37 @@ type verifier struct {
domain *domainCache
}
// TODO: rewrite this to use ROBlock.
func (vr verifier) verify(blks []interfaces.ReadOnlySignedBeaconBlock) (verifiedROBlocks, error) {
var err error
result := make([]blocks.ROBlock, len(blks))
func (vr verifier) verify(blks []blocks.ROBlock) (verifiedROBlocks, error) {
if len(blks) == 0 {
// Returning an error here simplifies handling in the caller.
// errEmptyVerificationSet should not cause the peer to be downscored.
return nil, errEmptyVerificationSet
}
sigSet := bls.NewSet()
for i := range blks {
result[i], err = blocks.NewROBlock(blks[i])
if err != nil {
return nil, err
}
if i > 0 && result[i-1].Root() != result[i].Block().ParentRoot() {
p, b := result[i-1], result[i]
if i > 0 && blks[i-1].Root() != blks[i].Block().ParentRoot() {
p, b := blks[i-1], blks[i]
return nil, errors.Wrapf(errInvalidBatchChain,
"slot %d parent_root=%#x, slot %d root=%#x",
b.Block().Slot(), b.Block().ParentRoot(),
p.Block().Slot(), p.Root())
}
set, err := vr.blockSignatureBatch(result[i])
set, err := vr.blockSignatureBatch(blks[i])
if err != nil {
return nil, err
return nil, errors.Wrap(err, "block signature batch")
}
sigSet.Join(set)
}
v, err := sigSet.Verify()
if err != nil {
return nil, errors.Wrap(err, "block signature verification error")
// The blst wrapper does not give us checkable errors, so we "reverse wrap"
// the error string to make it checkable for shouldDownscore.
return nil, errors.Wrap(errInvalidSignatureData, err.Error())
}
if !v {
return nil, errors.New("batch block signature verification failed")
return nil, errBatchSignatureFailed
}
return result, nil
return blks, nil
}
func (vr verifier) blockSignatureBatch(b blocks.ROBlock) (*bls.SignatureBatch, error) {

View File

@@ -0,0 +1,189 @@
package backfill
import (
"io"
"github.com/OffchainLabs/prysm/v7/beacon-chain/core/peerdas"
"github.com/OffchainLabs/prysm/v7/beacon-chain/das"
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/pkg/errors"
)
type columnBisector struct {
rootKeys map[[32]byte]rootKey
pidKeys map[peer.ID]pidKey
columnSource map[rootKey]map[uint64]pidKey
bisected map[pidKey][]blocks.RODataColumn
pidIter []peer.ID
current int
next int
downscore peerDownscorer
errs []error
failures map[rootKey]peerdas.ColumnIndices
}
type pidKey *peer.ID
type rootKey *[32]byte
var errColumnVerification = errors.New("column verification failed")
var errBisectInconsistent = errors.New("state of bisector inconsistent with columns to bisect")
func (c *columnBisector) addPeerColumns(pid peer.ID, columns ...blocks.RODataColumn) {
pk := c.peerIdKey(pid)
for _, col := range columns {
c.setColumnSource(c.rootKey(col.BlockRoot()), col.Index, pk)
}
}
// failuresFor returns the set of column indices that failed verification
// for the given block root.
func (c *columnBisector) failuresFor(root [32]byte) peerdas.ColumnIndices {
return c.failures[c.rootKey(root)]
}
func (c *columnBisector) failingRoots() [][32]byte {
roots := make([][32]byte, 0, len(c.failures))
for rk := range c.failures {
roots = append(roots, *rk)
}
return roots
}
func (c *columnBisector) setColumnSource(rk rootKey, idx uint64, pk pidKey) {
if c.columnSource == nil {
c.columnSource = make(map[rootKey]map[uint64]pidKey)
}
if c.columnSource[rk] == nil {
c.columnSource[rk] = make(map[uint64]pidKey)
}
c.columnSource[rk][idx] = pk
}
func (c *columnBisector) clearColumnSource(rk rootKey, idx uint64) {
if c.columnSource == nil {
return
}
if c.columnSource[rk] == nil {
return
}
delete(c.columnSource[rk], idx)
if len(c.columnSource[rk]) == 0 {
delete(c.columnSource, rk)
}
}
func (c *columnBisector) rootKey(root [32]byte) rootKey {
ptr, ok := c.rootKeys[root]
if ok {
return ptr
}
c.rootKeys[root] = &root
return c.rootKeys[root]
}
func (c *columnBisector) peerIdKey(pid peer.ID) pidKey {
ptr, ok := c.pidKeys[pid]
if ok {
return ptr
}
c.pidKeys[pid] = &pid
return c.pidKeys[pid]
}
func (c *columnBisector) peerFor(col blocks.RODataColumn) (pidKey, error) {
r := c.columnSource[c.rootKey(col.BlockRoot())]
if len(r) == 0 {
return nil, errors.Wrap(errBisectInconsistent, "root not tracked")
}
if ptr, ok := r[col.Index]; ok {
return ptr, nil
}
return nil, errors.Wrap(errBisectInconsistent, "index not tracked for root")
}
// reset prepares the columnBisector to be used to retry failed columns.
// it resets the peer sources of the failed columns and clears the failure records.
func (c *columnBisector) reset() {
// reset all column sources for failed columns
for rk, indices := range c.failures {
for _, idx := range indices.ToSlice() {
c.clearColumnSource(rk, idx)
}
}
c.failures = make(map[rootKey]peerdas.ColumnIndices)
c.errs = nil
}
// Bisect initializes columnBisector with the set of columns to bisect.
func (c *columnBisector) Bisect(columns []blocks.RODataColumn) (das.BisectionIterator, error) {
for _, col := range columns {
pid, err := c.peerFor(col)
if err != nil {
return nil, errors.Wrap(err, "could not lookup peer for column")
}
c.bisected[pid] = append(c.bisected[pid], col)
}
c.pidIter = make([]peer.ID, 0, len(c.bisected))
for pid := range c.bisected {
c.pidIter = append(c.pidIter, *pid)
}
// The implementation of Next() assumes these are equal in
// the base case.
c.current, c.next = 0, 0
return c, nil
}
// Next implements an iterator for the columnBisector.
// Each batch is from a single peer.
func (c *columnBisector) Next() ([]blocks.RODataColumn, error) {
if c.next >= len(c.pidIter) {
return nil, io.EOF
}
c.current = c.next
pid := c.pidIter[c.current]
cols := c.bisected[c.peerIdKey(pid)]
c.next += 1
return cols, nil
}
// Error implements das.Bisector.
func (c *columnBisector) Error() error {
if len(c.errs) > 0 {
return errColumnVerification
}
return nil
}
// OnError implements das.Bisector.
func (c *columnBisector) OnError(err error) {
c.errs = append(c.errs, err)
pid := c.pidIter[c.current]
c.downscore(pid, "column verification error", err)
// Track which roots failed by examining columns from the current peer
pk := c.peerIdKey(pid)
columns := c.bisected[pk]
for _, col := range columns {
root := col.BlockRoot()
rk := c.rootKey(root)
if c.failures[rk] == nil {
c.failures[rk] = make(peerdas.ColumnIndices)
}
c.failures[rk][col.Index] = struct{}{}
}
}
var _ das.Bisector = &columnBisector{}
var _ das.BisectionIterator = &columnBisector{}
func newColumnBisector(downscorer peerDownscorer) *columnBisector {
return &columnBisector{
rootKeys: make(map[[32]byte]rootKey),
pidKeys: make(map[peer.ID]pidKey),
columnSource: make(map[rootKey]map[uint64]pidKey),
bisected: make(map[pidKey][]blocks.RODataColumn),
failures: make(map[rootKey]peerdas.ColumnIndices),
downscore: downscorer,
}
}

View File

@@ -0,0 +1,569 @@
package backfill
import (
"io"
"testing"
"github.com/OffchainLabs/prysm/v7/beacon-chain/core/peerdas"
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
"github.com/OffchainLabs/prysm/v7/testing/require"
"github.com/OffchainLabs/prysm/v7/testing/util"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/pkg/errors"
)
// mockDownscorer is a simple downscorer that tracks calls
type mockDownscorer struct {
calls []struct {
pid peer.ID
msg string
err error
}
}
func (m *mockDownscorer) downscoreCall(pid peer.ID, msg string, err error) {
m.calls = append(m.calls, struct {
pid peer.ID
msg string
err error
}{pid, msg, err})
}
// createTestDataColumn creates a test data column with the given parameters.
// nBlobs determines the number of cells, commitments, and proofs.
func createTestDataColumn(t *testing.T, root [32]byte, index uint64, nBlobs int) util.DataColumnParam {
commitments := make([][]byte, nBlobs)
cells := make([][]byte, nBlobs)
proofs := make([][]byte, nBlobs)
for i := 0; i < nBlobs; i++ {
commitments[i] = make([]byte, 48)
cells[i] = make([]byte, 0)
proofs[i] = make([]byte, 48)
}
return util.DataColumnParam{
Index: index,
Column: cells,
KzgCommitments: commitments,
KzgProofs: proofs,
Slot: primitives.Slot(1),
BodyRoot: root[:],
StateRoot: make([]byte, 32),
ParentRoot: make([]byte, 32),
}
}
// createTestPeerID creates a test peer ID from a string seed.
func createTestPeerID(t *testing.T, seed string) peer.ID {
pid, err := peer.Decode(seed)
require.NoError(t, err)
return pid
}
// TestNewColumnBisector verifies basic initialization
func TestNewColumnBisector(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
require.NotNil(t, cb)
require.NotNil(t, cb.rootKeys)
require.NotNil(t, cb.pidKeys)
require.NotNil(t, cb.columnSource)
require.NotNil(t, cb.bisected)
require.Equal(t, 0, cb.current)
require.Equal(t, 0, cb.next)
}
// TestAddAndIterateColumns demonstrates creating test columns and iterating
func TestAddAndIterateColumns(t *testing.T) {
root := [32]byte{1, 0, 0}
params := []util.DataColumnParam{
createTestDataColumn(t, root, 0, 2),
createTestDataColumn(t, root, 1, 2),
}
roColumns, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, params)
require.Equal(t, 2, len(roColumns))
// Create downscorer and bisector
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
// Create test peer ID
pid1 := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMq")
// Add columns from peer
cb.addPeerColumns(pid1, roColumns...)
// Bisect and verify iteration
iter, err := cb.Bisect(roColumns)
require.NoError(t, err)
require.NotNil(t, iter)
// Get first (and only) batch from the peer
batch, err := iter.Next()
require.NoError(t, err)
require.Equal(t, 2, len(batch))
// Next should return EOF
_, err = iter.Next()
require.Equal(t, io.EOF, err)
}
// TestRootKeyDeduplication verifies that rootKey returns the same pointer for identical roots
func TestRootKeyDeduplication(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
root := [32]byte{1, 2, 3}
key1 := cb.rootKey(root)
key2 := cb.rootKey(root)
// Should be the same pointer
require.Equal(t, key1, key2)
}
// TestPeerIdKeyDeduplication verifies that peerIdKey returns the same pointer for identical peer IDs
func TestPeerIdKeyDeduplication(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
pid := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMq")
key1 := cb.peerIdKey(pid)
key2 := cb.peerIdKey(pid)
// Should be the same pointer
require.Equal(t, key1, key2)
}
// TestMultipleRootsAndPeers verifies handling of multiple distinct roots and peer IDs
func TestMultipleRootsAndPeers(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
root1 := [32]byte{1, 0, 0}
root2 := [32]byte{2, 0, 0}
root3 := [32]byte{3, 0, 0}
pid1 := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMq")
pid2 := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMr")
// Register multiple columns with different roots and peers
params1 := createTestDataColumn(t, root1, 0, 2)
params2 := createTestDataColumn(t, root2, 1, 2)
params3 := createTestDataColumn(t, root3, 2, 2)
cols1, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, []util.DataColumnParam{params1})
cols2, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, []util.DataColumnParam{params2})
cols3, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, []util.DataColumnParam{params3})
cb.addPeerColumns(pid1, cols1...)
cb.addPeerColumns(pid2, cols2...)
cb.addPeerColumns(pid1, cols3...)
// Verify roots and peers are tracked
require.Equal(t, 3, len(cb.rootKeys))
require.Equal(t, 2, len(cb.pidKeys))
}
// TestSetColumnSource verifies that columns from different peers are properly tracked
func TestSetColumnSource(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
// Create multiple peers with columns
root1 := [32]byte{1, 0, 0}
root2 := [32]byte{2, 0, 0}
root3 := [32]byte{3, 0, 0}
pid1 := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMq")
pid2 := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMr")
// Create columns for peer1: 2 columns
params1 := []util.DataColumnParam{
createTestDataColumn(t, root1, 0, 1),
createTestDataColumn(t, root2, 1, 1),
}
// Create columns for peer2: 2 columns
params2 := []util.DataColumnParam{
createTestDataColumn(t, root3, 0, 1),
createTestDataColumn(t, root1, 2, 1),
}
cols1, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, params1)
cols2, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, params2)
// Register columns from both peers
cb.addPeerColumns(pid1, cols1...)
cb.addPeerColumns(pid2, cols2...)
// Use Bisect to verify columns are grouped by peer
allCols := append(cols1, cols2...)
iter, err := cb.Bisect(allCols)
require.NoError(t, err)
// Verify peer1's columns
batch1, err := iter.Next()
require.NoError(t, err)
require.Equal(t, 2, len(batch1))
// Verify each column maps to the correct peer using peerFor
for _, col := range batch1 {
colPeer, err := cb.peerFor(col)
require.NoError(t, err)
require.Equal(t, cb.peerIdKey(pid1), colPeer)
}
// Verify peer2's columns
batch2, err := iter.Next()
require.NoError(t, err)
require.Equal(t, 2, len(batch2))
// Verify each column maps to the correct peer
for _, col := range batch2 {
colPeer, err := cb.peerFor(col)
require.NoError(t, err)
require.Equal(t, cb.peerIdKey(pid2), colPeer)
}
// Verify we've consumed all batches
_, err = iter.Next()
require.Equal(t, io.EOF, err)
}
// TestClearColumnSource verifies column removal and cleanup of empty maps
func TestClearColumnSource(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
root := [32]byte{1, 0, 0}
rk := cb.rootKey(root)
pid := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMq")
pk := cb.peerIdKey(pid)
cb.setColumnSource(rk, 0, pk)
cb.setColumnSource(rk, 1, pk)
require.Equal(t, 2, len(cb.columnSource[rk]))
// Clear one column
cb.clearColumnSource(rk, 0)
require.Equal(t, 1, len(cb.columnSource[rk]))
// Clear the last column - should remove the root entry
cb.clearColumnSource(rk, 1)
_, exists := cb.columnSource[rk]
require.Equal(t, false, exists)
}
// TestClearNonexistentColumn ensures clearing non-existent columns doesn't crash
func TestClearNonexistentColumn(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
root := [32]byte{1, 0, 0}
rk := cb.rootKey(root)
// Should not panic
cb.clearColumnSource(rk, 99)
}
// TestFailuresFor verifies failuresFor returns correct failures for a block root
func TestFailuresFor(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
root := [32]byte{1, 0, 0}
rk := cb.rootKey(root)
// Initially no failures
failures := cb.failuresFor(root)
require.Equal(t, 0, len(failures.ToSlice()))
// Set some failures
cb.failures[rk] = peerdas.ColumnIndices{0: struct{}{}, 1: struct{}{}, 2: struct{}{}}
failures = cb.failuresFor(root)
require.Equal(t, 3, len(failures.ToSlice()))
}
// TestFailingRoots ensures failingRoots returns all roots with failures
func TestFailingRoots(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
root1 := [32]byte{1, 0, 0}
root2 := [32]byte{2, 0, 0}
rk1 := cb.rootKey(root1)
rk2 := cb.rootKey(root2)
cb.failures[rk1] = peerdas.ColumnIndices{0: struct{}{}}
cb.failures[rk2] = peerdas.ColumnIndices{1: struct{}{}}
failingRoots := cb.failingRoots()
require.Equal(t, 2, len(failingRoots))
}
// TestPeerFor verifies peerFor correctly returns the peer for a column
func TestPeerFor(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
root := [32]byte{1, 0, 0}
pid := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMq")
params := createTestDataColumn(t, root, 0, 2)
cols, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, []util.DataColumnParam{params})
// Use addPeerColumns to properly register the column
cb.addPeerColumns(pid, cols[0])
peerKey, err := cb.peerFor(cols[0])
require.NoError(t, err)
require.NotNil(t, peerKey)
}
// TestPeerForNotTracked ensures error when root not tracked
func TestPeerForNotTracked(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
root := [32]byte{1, 0, 0}
params := createTestDataColumn(t, root, 0, 2)
cols, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, []util.DataColumnParam{params})
// Don't add any columns - root is not tracked
_, err := cb.peerFor(cols[0])
require.ErrorIs(t, err, errBisectInconsistent)
}
// TestBisectGroupsByMultiplePeers ensures columns grouped by their peer source
func TestBisectGroupsByMultiplePeers(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
root := [32]byte{1, 0, 0}
pid1 := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMq")
pid2 := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMr")
params1 := createTestDataColumn(t, root, 0, 2)
params2 := createTestDataColumn(t, root, 1, 2)
cols1, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, []util.DataColumnParam{params1})
cols2, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, []util.DataColumnParam{params2})
cb.addPeerColumns(pid1, cols1...)
cb.addPeerColumns(pid2, cols2...)
// Bisect both columns
iter, err := cb.Bisect(append(cols1, cols2...))
require.NoError(t, err)
// Should get two separate batches, one from each peer
batch1, err := iter.Next()
require.NoError(t, err)
require.Equal(t, 1, len(batch1))
batch2, err := iter.Next()
require.NoError(t, err)
require.Equal(t, 1, len(batch2))
_, err = iter.Next()
require.Equal(t, io.EOF, err)
}
// TestOnError verifies OnError records errors and calls downscorer
func TestOnError(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
pid := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMq")
cb.pidIter = append(cb.pidIter, pid)
cb.current = 0
testErr := errors.New("test error")
cb.OnError(testErr)
require.Equal(t, 1, len(cb.errs))
require.Equal(t, 1, len(downscorer.calls))
require.Equal(t, pid, downscorer.calls[0].pid)
}
// TestErrorReturnAfterOnError ensures Error() returns non-nil after OnError called
func TestErrorReturnAfterOnError(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
pid := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMq")
cb.pidIter = append(cb.pidIter, pid)
cb.current = 0
require.NoError(t, cb.Error())
cb.OnError(errors.New("test error"))
require.NotNil(t, cb.Error())
}
// TestResetClearsFailures verifies reset clears all failures and errors
func TestResetClearsFailures(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
root := [32]byte{1, 0, 0}
rk := cb.rootKey(root)
cb.failures[rk] = peerdas.ColumnIndices{0: struct{}{}, 1: struct{}{}}
cb.errs = []error{errors.New("test")}
cb.reset()
require.Equal(t, 0, len(cb.failures))
require.Equal(t, 0, len(cb.errs))
}
// TestResetClearsColumnSources ensures reset clears column sources for failed columns
func TestResetClearsColumnSources(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
root := [32]byte{1, 0, 0}
rk := cb.rootKey(root)
pid := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMq")
pk := cb.peerIdKey(pid)
cb.setColumnSource(rk, 0, pk)
cb.setColumnSource(rk, 1, pk)
cb.failures[rk] = peerdas.ColumnIndices{0: struct{}{}, 1: struct{}{}}
cb.reset()
// Column sources for the failed root should be cleared
_, exists := cb.columnSource[rk]
require.Equal(t, false, exists)
}
// TestBisectResetBisectAgain tests end-to-end multiple bisect cycles with reset
func TestBisectResetBisectAgain(t *testing.T) {
downscorer := &mockDownscorer{}
root := [32]byte{1, 0, 0}
pid := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMq")
params := createTestDataColumn(t, root, 0, 2)
cols, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, []util.DataColumnParam{params})
// First bisect with fresh bisector
cb1 := newColumnBisector(downscorer.downscoreCall)
cb1.addPeerColumns(pid, cols...)
iter, err := cb1.Bisect(cols)
require.NoError(t, err)
batch, err := iter.Next()
require.NoError(t, err)
require.Equal(t, 1, len(batch))
_, err = iter.Next()
require.Equal(t, io.EOF, err)
// Second bisect with a new bisector (simulating retry with reset)
cb2 := newColumnBisector(downscorer.downscoreCall)
cb2.addPeerColumns(pid, cols...)
iter, err = cb2.Bisect(cols)
require.NoError(t, err)
batch, err = iter.Next()
require.NoError(t, err)
require.Equal(t, 1, len(batch))
}
// TestBisectEmptyColumns tests Bisect with empty column list
func TestBisectEmptyColumns(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
var emptyColumns []util.DataColumnParam
roColumns, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, emptyColumns)
iter, err := cb.Bisect(roColumns)
// This should not error with empty columns
if err == nil {
_, err := iter.Next()
require.Equal(t, io.EOF, err)
}
}
// TestCompleteFailureFlow tests marking a peer as failed and tracking failure roots
func TestCompleteFailureFlow(t *testing.T) {
downscorer := &mockDownscorer{}
cb := newColumnBisector(downscorer.downscoreCall)
// Create multiple roots with columns from different peers
root1 := [32]byte{1, 0, 0}
root2 := [32]byte{2, 0, 0}
root3 := [32]byte{3, 0, 0}
pid1 := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMq")
pid2 := createTestPeerID(t, "QmYyQSo1c1Ym7orWxLYvCrM2EmxFTSc34pP8r3hidQPQMr")
// Create columns: pid1 provides columns for root1 and root2, pid2 provides for root3
params1 := []util.DataColumnParam{
createTestDataColumn(t, root1, 0, 2),
createTestDataColumn(t, root2, 1, 2),
}
params2 := []util.DataColumnParam{
createTestDataColumn(t, root3, 2, 2),
}
cols1, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, params1)
cols2, _ := util.CreateTestVerifiedRoDataColumnSidecars(t, params2)
// Register columns from both peers
cb.addPeerColumns(pid1, cols1...)
cb.addPeerColumns(pid2, cols2...)
// Bisect all columns
allCols := append(cols1, cols2...)
iter, err := cb.Bisect(allCols)
require.NoError(t, err)
// Get first batch from pid1
batch1, err := iter.Next()
require.NoError(t, err)
require.Equal(t, 2, len(batch1))
// Mark pid1 as failed
// First, manually extract the roots from batch1 to ensure we can track them
rootsInBatch1 := make(map[[32]byte]bool)
for _, col := range batch1 {
rootsInBatch1[col.BlockRoot()] = true
}
cb.OnError(errors.New("pid1 verification failed"))
// Verify downscorer was called for pid1
require.Equal(t, 1, len(downscorer.calls))
require.Equal(t, pid1, downscorer.calls[0].pid)
// Verify that failures contains the roots from batch1
require.Equal(t, len(rootsInBatch1), len(cb.failingRoots()))
// Get remaining batches until EOF
batch2, err := iter.Next()
require.NoError(t, err)
require.Equal(t, 1, len(batch2))
_, err = iter.Next()
require.Equal(t, io.EOF, err)
// Verify failingRoots matches the roots from the failed peer (pid1)
failingRoots := cb.failingRoots()
require.Equal(t, len(rootsInBatch1), len(failingRoots))
// Verify the failing roots are exactly the ones from batch1
failingRootsMap := make(map[[32]byte]bool)
for _, root := range failingRoots {
failingRootsMap[root] = true
}
for root := range rootsInBatch1 {
require.Equal(t, true, failingRootsMap[root])
}
}

View File

@@ -8,7 +8,6 @@ import (
fieldparams "github.com/OffchainLabs/prysm/v7/config/fieldparams"
"github.com/OffchainLabs/prysm/v7/config/params"
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
"github.com/OffchainLabs/prysm/v7/consensus-types/interfaces"
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
"github.com/OffchainLabs/prysm/v7/crypto/bls"
"github.com/OffchainLabs/prysm/v7/encoding/bytesutil"
@@ -70,12 +69,7 @@ func TestVerify(t *testing.T) {
}
v, err := newBackfillVerifier(vr, pubkeys)
require.NoError(t, err)
notrob := make([]interfaces.ReadOnlySignedBeaconBlock, len(blks))
// We have to unwrap the ROBlocks for this code because that's what it expects (for now).
for i := range blks {
notrob[i] = blks[i].ReadOnlySignedBeaconBlock
}
vbs, err := v.verify(notrob)
vbs, err := v.verify(blks)
require.NoError(t, err)
require.Equal(t, len(blks), len(vbs))
}

View File

@@ -9,9 +9,56 @@ import (
"github.com/OffchainLabs/prysm/v7/beacon-chain/startup"
"github.com/OffchainLabs/prysm/v7/beacon-chain/sync"
"github.com/OffchainLabs/prysm/v7/beacon-chain/verification"
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
"github.com/OffchainLabs/prysm/v7/encoding/bytesutil"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/pkg/errors"
)
var errInvalidBatchState = errors.New("invalid batch state")
type peerDownscorer func(peer.ID, string, error)
type workerCfg struct {
clock *startup.Clock
verifier *verifier
ctxMap sync.ContextByteVersions
newVB verification.NewBlobVerifier
newVC verification.NewDataColumnsVerifier
blobStore *filesystem.BlobStorage
colStore *filesystem.DataColumnStorage
downscore peerDownscorer
}
func initWorkerCfg(ctx context.Context, cfg *workerCfg, vw InitializerWaiter, store *Store) (*workerCfg, error) {
vi, err := vw.WaitForInitializer(ctx)
if err != nil {
return nil, err
}
cps, err := store.originState(ctx)
if err != nil {
return nil, err
}
keys, err := cps.PublicKeys()
if err != nil {
return nil, errors.Wrap(err, "unable to retrieve public keys for all validators in the origin state")
}
vr := cps.GenesisValidatorsRoot()
cm, err := sync.ContextByteVersionsForValRoot(bytesutil.ToBytes32(vr))
if err != nil {
return nil, errors.Wrapf(err, "unable to initialize context version map using genesis validator root %#x", vr)
}
v, err := newBackfillVerifier(vr, keys)
if err != nil {
return nil, errors.Wrapf(err, "newBackfillVerifier failed")
}
cfg.verifier = v
cfg.ctxMap = cm
cfg.newVB = newBlobVerifierFromInitializer(vi)
cfg.newVC = newDataColumnVerifierFromInitializer(vi)
return cfg, nil
}
type workerId int
type p2pWorker struct {
@@ -19,23 +66,46 @@ type p2pWorker struct {
todo chan batch
done chan batch
p2p p2p.P2P
v *verifier
c *startup.Clock
cm sync.ContextByteVersions
nbv verification.NewBlobVerifier
bfs *filesystem.BlobStorage
cfg *workerCfg
}
func newP2pWorker(id workerId, p p2p.P2P, todo, done chan batch, cfg *workerCfg) *p2pWorker {
return &p2pWorker{
id: id,
todo: todo,
done: done,
p2p: p,
cfg: cfg,
}
}
func (w *p2pWorker) run(ctx context.Context) {
for {
select {
case b := <-w.todo:
log.WithFields(b.logFields()).WithField("backfillWorker", w.id).Debug("Backfill worker received batch")
if b.state == batchBlobSync {
w.done <- w.handleBlobs(ctx, b)
} else {
w.done <- w.handleBlocks(ctx, b)
if err := b.waitUntilReady(ctx); err != nil {
log.WithField("batchId", b.id()).WithError(ctx.Err()).Info("Worker context canceled while waiting to retry")
continue
}
log.WithFields(b.logFields()).WithField("backfillWorker", w.id).Trace("Backfill worker received batch")
switch b.state {
case batchSequenced:
b = w.handleBlocks(ctx, b)
case batchSyncBlobs:
b = w.handleBlobs(ctx, b)
case batchSyncColumns:
b = w.handleColumns(ctx, b)
case batchImportable:
// This state indicates the batch got all the way to be imported and failed,
// so we need clear out the blocks to go all the way back to the start of the process.
b.blocks = nil
b = w.handleBlocks(ctx, b)
default:
// A batch in an unknown state represents an implementation error,
// so we treat it as a fatal error meaning the worker pool should shut down.
b = b.withFatalError(errors.Wrap(errInvalidBatchState, b.state.String()))
}
w.done <- b
case <-ctx.Done():
log.WithField("backfillWorker", w.id).Info("Backfill worker exiting after context canceled")
return
@@ -43,74 +113,146 @@ func (w *p2pWorker) run(ctx context.Context) {
}
}
func (w *p2pWorker) handleBlocks(ctx context.Context, b batch) batch {
cs := w.c.CurrentSlot()
blobRetentionStart, err := sync.BlobRPCMinValidSlot(cs)
if err != nil {
return b.withRetryableError(errors.Wrap(err, "configuration issue, could not compute minimum blob retention slot"))
func resetRetryableColumns(b batch) batch {
// return the given batch as-is if it isn't in a state that this func should handle.
if b.columns == nil || b.columns.bisector == nil || len(b.columns.bisector.errs) == 0 {
return b
}
b.blockPid = b.busy
bisector := b.columns.bisector
roots := bisector.failingRoots()
if len(roots) == 0 {
return b
}
// Add all the failed columns back to the toDownload structure.
for _, root := range roots {
bc := b.columns.toDownload[root]
bc.remaining.Union(bisector.failuresFor(root))
}
b.columns.bisector.reset()
return b
}
func (w *p2pWorker) handleBlocks(ctx context.Context, b batch) batch {
current := w.cfg.clock.CurrentSlot()
b.blockPeer = b.assignedPeer
start := time.Now()
results, err := sync.SendBeaconBlocksByRangeRequest(ctx, w.c, w.p2p, b.blockPid, b.blockRequest(), blockValidationMetrics)
dlt := time.Now()
backfillBatchTimeDownloadingBlocks.Observe(float64(dlt.Sub(start).Milliseconds()))
results, err := sync.SendBeaconBlocksByRangeRequest(ctx, w.cfg.clock, w.p2p, b.blockPeer, b.blockRequest(), blockValidationMetrics)
if err != nil {
log.WithError(err).WithFields(b.logFields()).Debug("Batch requesting failed")
return b.withRetryableError(err)
}
vb, err := w.v.verify(results)
backfillBatchTimeVerifying.Observe(float64(time.Since(dlt).Milliseconds()))
dlt := time.Now()
blockDownloadMs.Observe(float64(dlt.Sub(start).Milliseconds()))
toVerify, err := blocks.NewROBlockSlice(results)
if err != nil {
log.WithError(err).WithFields(b.logFields()).Debug("Batch conversion to ROBlock failed")
return b.withRetryableError(err)
}
verified, err := w.cfg.verifier.verify(toVerify)
blockVerifyMs.Observe(float64(time.Since(dlt).Milliseconds()))
if err != nil {
if shouldDownscore(err) {
w.cfg.downscore(b.blockPeer, "invalid SignedBeaconBlock batch rpc response", err)
}
log.WithError(err).WithFields(b.logFields()).Debug("Batch validation failed")
return b.withRetryableError(err)
}
// This is a hack to get the rough size of the batch. This helps us approximate the amount of memory needed
// to hold batches and relative sizes between batches, but will be inaccurate when it comes to measuring actual
// bytes downloaded from peers, mainly because the p2p messages are snappy compressed.
bdl := 0
for i := range vb {
bdl += vb[i].SizeSSZ()
for i := range verified {
bdl += verified[i].SizeSSZ()
}
backfillBlocksApproximateBytes.Add(float64(bdl))
blockDownloadBytesApprox.Add(float64(bdl))
log.WithFields(b.logFields()).WithField("dlbytes", bdl).Debug("Backfill batch block bytes downloaded")
bs, err := newBlobSync(cs, vb, &blobSyncConfig{retentionStart: blobRetentionStart, nbv: w.nbv, store: w.bfs})
b.blocks = verified
blobRetentionStart, err := sync.BlobRPCMinValidSlot(current)
if err != nil {
return b.withRetryableError(errors.Wrap(err, "configuration issue, could not compute minimum blob retention slot"))
}
bscfg := &blobSyncConfig{retentionStart: blobRetentionStart, nbv: w.cfg.newVB, store: w.cfg.blobStore}
bs, err := newBlobSync(current, verified, bscfg)
if err != nil {
return b.withRetryableError(err)
}
return b.withResults(vb, bs)
cs, err := newColumnSync(ctx, b, verified, current, w.p2p, verified, w.cfg)
if err != nil {
return b.withRetryableError(err)
}
b.blobs = bs
b.columns = cs
return b.transitionToNext()
}
func (w *p2pWorker) handleBlobs(ctx context.Context, b batch) batch {
b.blobPid = b.busy
b.blobs.peer = b.assignedPeer
start := time.Now()
// we don't need to use the response for anything other than metrics, because blobResponseValidation
// adds each of them to a batch AvailabilityStore once it is checked.
blobs, err := sync.SendBlobsByRangeRequest(ctx, w.c, w.p2p, b.blobPid, w.cm, b.blobRequest(), b.blobResponseValidator(), blobValidationMetrics)
blobs, err := sync.SendBlobsByRangeRequest(ctx, w.cfg.clock, w.p2p, b.blobs.peer, w.cfg.ctxMap, b.blobRequest(), b.blobs.validateNext, blobValidationMetrics)
if err != nil {
b.bs = nil
b.blobs = nil
return b.withRetryableError(err)
}
dlt := time.Now()
backfillBatchTimeDownloadingBlobs.Observe(float64(dlt.Sub(start).Milliseconds()))
blobSidecarDownloadMs.Observe(float64(dlt.Sub(start).Milliseconds()))
if len(blobs) > 0 {
// All blobs are the same size, so we can compute 1 and use it for all in the batch.
sz := blobs[0].SizeSSZ() * len(blobs)
backfillBlobsApproximateBytes.Add(float64(sz))
blobSidecarDownloadBytesApprox.Add(float64(sz))
log.WithFields(b.logFields()).WithField("dlbytes", sz).Debug("Backfill batch blob bytes downloaded")
}
return b.postBlobSync()
if b.blobs.needed() > 0 {
// If we are missing blobs after processing the blob step, this is an error and we need to scrap the batch and start over.
b.blobs = nil
b.blocks = []blocks.ROBlock{}
return b.withRetryableError(errors.New("missing blobs after blob download"))
}
return b.transitionToNext()
}
func newP2pWorker(id workerId, p p2p.P2P, todo, done chan batch, c *startup.Clock, v *verifier, cm sync.ContextByteVersions, nbv verification.NewBlobVerifier, bfs *filesystem.BlobStorage) *p2pWorker {
return &p2pWorker{
id: id,
todo: todo,
done: done,
p2p: p,
v: v,
c: c,
cm: cm,
nbv: nbv,
bfs: bfs,
func (w *p2pWorker) handleColumns(ctx context.Context, b batch) batch {
start := time.Now()
b.columns.peer = b.assignedPeer
// Bisector is used to keep track of the peer that provided each column, for scoring purposes.
// When verification of a batch of columns fails, bisector is used to retry verification with batches
// grouped by peer, to figure out if the failure is due to a specific peer.
vr := b.validatingColumnRequest(b.columns.bisector)
p := sync.DataColumnSidecarsParams{
Ctx: ctx,
Tor: w.cfg.clock,
P2P: w.p2p,
CtxMap: w.cfg.ctxMap,
// DownscorePeerOnRPCFault is very aggressive and is only used for fetching origin blobs during startup.
DownscorePeerOnRPCFault: false,
// SendDataColumnSidecarsByRangeRequest uses the DataColumnSidecarsParams param struct to cover
// multiple different use cases. Some of them have different required fields. The following fields are
// not used in the methods that backfill invokes. SendDataColumnSidecarsByRangeRequest should be refactored
// to only require the minimum set of parameters.
//RateLimiter *leakybucket.Collector
//Storage: w.cfg.cfs,
//NewVerifier: vr.validate,
}
// The return is dropped because the validation code adds the columns
// to the columnSync AvailabilityStore under the hood.
_, err := sync.SendDataColumnSidecarsByRangeRequest(p, b.columns.peer, vr.req, vr.validate)
if err != nil {
if shouldDownscore(err) {
w.cfg.downscore(b.columns.peer, "invalid DataColumnSidecar rpc response", err)
}
return b.withRetryableError(errors.Wrap(err, "failed to request data column sidecars"))
}
dataColumnSidecarDownloadMs.Observe(float64(time.Since(start).Milliseconds()))
return b.transitionToNext()
}
func shouldDownscore(err error) bool {
return errors.Is(err, errInvalidDataColumnResponse) ||
errors.Is(err, sync.ErrInvalidFetchedData) ||
errors.Is(err, errInvalidBlocks)
}

View File

@@ -0,0 +1,240 @@
package sync
import (
"cmp"
"math"
"slices"
"time"
"github.com/OffchainLabs/prysm/v7/beacon-chain/core/peerdas"
"github.com/OffchainLabs/prysm/v7/beacon-chain/p2p"
"github.com/ethereum/go-ethereum/p2p/enode"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/pkg/errors"
)
// DASPeerCache caches information about a set of peers DAS peering decisions.
type DASPeerCache struct {
p2pSvc p2p.P2P
peers map[peer.ID]*dasPeer
}
// dasPeer represents a peer's custody of columns and their coverage score.
type dasPeer struct {
pid peer.ID
enid enode.ID
custodied peerdas.ColumnIndices
lastAssigned time.Time
}
// dasPeerScore is used to build a slice of peer+score pairs for ranking purproses.
type dasPeerScore struct {
peer *dasPeer
score float64
}
// PeerPicker is a structure that maps out the intersection of peer custody and column indices
// to weight each peer based on the scarcity of the columns they custody. This allows us to prioritize
// requests for more scarce columns to peers that custody them, so that we don't waste our bandwidth allocation
// making requests for more common columns from peers that can provide the more scarce columns.
type PeerPicker struct {
scores []*dasPeerScore // scores is a set of generic scores, based on the full custody group set
ranker *rarityRanker
custodians map[uint64][]*dasPeer
toCustody peerdas.ColumnIndices // full set of columns this node will try to custody
reqInterval time.Duration
}
// NewDASPeerCache initializes a DASPeerCache. This type is not currently thread safe.
func NewDASPeerCache(p2pSvc p2p.P2P) *DASPeerCache {
return &DASPeerCache{
peers: make(map[peer.ID]*dasPeer),
p2pSvc: p2pSvc,
}
}
// NewColumnScarcityRanking computes the ColumnScarcityRanking based on the current view of columns custodied
// by the given set of peers. New PeerPickers should be created somewhat frequently, as the status of peers can
// change, including the set of columns each peer custodies.
// reqInterval sets the frequency that a peer can be picked in terms of time. A peer can be picked once per reqInterval,
// eg a value of time.Second would allow 1 request per second to the peer, or a value of 500 * time.Millisecond would allow
// 2 req/sec.
func (c *DASPeerCache) NewPicker(pids []peer.ID, toCustody peerdas.ColumnIndices, reqInterval time.Duration) (*PeerPicker, error) {
// For each of the given peers, refresh the cache's view of their currently custodied columns.
// Also populate 'custodians', which stores the set of peers that custody each column index.
custodians := make(map[uint64][]*dasPeer, len(toCustody))
scores := make([]*dasPeerScore, 0, len(pids))
for _, pid := range pids {
peer, err := c.refresh(pid, toCustody)
if err != nil {
log.WithField("peerID", pid).WithError(err).Debug("Failed to convert peer ID to node ID.")
continue
}
for col := range peer.custodied {
if toCustody.Has(col) {
custodians[col] = append(custodians[col], peer)
}
}
// set score to math.MaxFloat64 so we can tell that it hasn't been initialized
scores = append(scores, &dasPeerScore{peer: peer, score: math.MaxFloat64})
}
return &PeerPicker{
toCustody: toCustody,
ranker: newRarityRanker(toCustody, custodians),
custodians: custodians,
scores: scores,
reqInterval: reqInterval,
}, nil
}
// refresh supports NewPicker in getting the latest dasPeer view for the given peer.ID. It caches the result
// of the enode.ID computation but refreshes the custody group count each time it is called, leveraging the
// cache behind peerdas.Info.
func (c *DASPeerCache) refresh(pid peer.ID, toCustody peerdas.ColumnIndices) (*dasPeer, error) {
// Computing the enode.ID seems to involve multiple parseing and validation steps followed by a
// hash computation, so it seems worth trying to cache the result.
p, ok := c.peers[pid]
if !ok {
nodeID, err := p2p.ConvertPeerIDToNodeID(pid)
if err != nil {
// If we can't convert the peer ID to a node ID, remove peer from the cache.
delete(c.peers, pid)
return nil, err
}
p = &dasPeer{enid: nodeID, pid: pid}
}
if len(toCustody) > 0 {
dasInfo, _, err := peerdas.Info(p.enid, c.p2pSvc.CustodyGroupCountFromPeer(pid))
if err != nil {
// If we can't get the peerDAS info, remove peer from the cache.
delete(c.peers, pid)
return nil, errors.Wrapf(err, "CustodyGroupCountFromPeer, peerID=%s, nodeID=%s", pid, p.enid)
}
p.custodied = peerdas.NewColumnIndicesFromMap(dasInfo.CustodyColumns)
} else {
p.custodied = peerdas.NewColumnIndices()
}
c.peers[pid] = p
return p, nil
}
// ForColumns returns the best peer to request columns from, based on the scarcity of the columns needed.
func (m *PeerPicker) ForColumns(needed peerdas.ColumnIndices, busy map[peer.ID]bool) (peer.ID, []uint64, error) {
// - find the custodied column with the lowest frequency
// - collect all the peers that have custody of that column
// - score the peers by the rarity of the needed columns they offer
var best *dasPeer
bestScore, bestCoverage := 0.0, []uint64{}
for _, col := range m.ranker.ascendingRarity(needed) {
for _, p := range m.custodians[col] {
// enforce a minimum interval between requests to the same peer
if p.lastAssigned.Add(m.reqInterval).After(time.Now()) {
continue
}
if busy[p.pid] {
continue
}
covered := p.custodied.Intersection(needed)
if len(covered) == 0 {
continue
}
// update best if any of the following:
// - current score better than previous best
// - scores are tied, and current coverage is better than best
// - scores are tied, coverage equal, pick the least-recently used peer
score := m.ranker.score(covered)
if score < bestScore {
continue
}
if score == bestScore && best != nil {
if len(covered) < len(bestCoverage) {
continue
}
if len(covered) == len(bestCoverage) && best.lastAssigned.Before(p.lastAssigned) {
continue
}
}
best, bestScore, bestCoverage = p, score, covered.ToSlice()
}
if best != nil {
best.lastAssigned = time.Now()
slices.Sort(bestCoverage)
return best.pid, bestCoverage, nil
}
}
return "", nil, errors.New("no peers able to cover needed columns")
}
// ForBlocks returns the lowest scoring peer in the set. This can be used to pick a peer
// for block requests, preserving the peers that have the highest coverage scores
// for column requests.
func (m *PeerPicker) ForBlocks(busy map[peer.ID]bool) (peer.ID, error) {
slices.SortFunc(m.scores, func(a, b *dasPeerScore) int {
// MaxFloat64 is used as a sentinel value for an uninitialized score;
// check and set scores while sorting for uber-lazy initialization.
if a.score == math.MaxFloat64 {
a.score = m.ranker.score(a.peer.custodied.Intersection(m.toCustody))
}
if b.score == math.MaxFloat64 {
b.score = m.ranker.score(b.peer.custodied.Intersection(m.toCustody))
}
return cmp.Compare(a.score, b.score)
})
for _, ds := range m.scores {
if !busy[ds.peer.pid] {
return ds.peer.pid, nil
}
}
return "", errors.New("no peers available")
}
// rarityRanker is initialized with the set of columns this node needs to custody, and the set of
// all peer custody columns. With that information it is able to compute a numeric representation of
// column rarity, and use that number to give each peer a score that represents how fungible their
// bandwidth likely is relative to other peers given a more specific set of needed columns.
type rarityRanker struct {
// rarity maps column indices to their rarity scores.
// The rarity score is defined as the inverse of the number of custodians: 1/custodians
// So the rarity of the columns a peer custodies can be simply added together for a score
// representing how unique their custody groups are; rarer columns contribute larger values to scores.
rarity map[uint64]float64
asc []uint64 // columns indices ordered by ascending rarity
}
// newRarityRanker precomputes data used for scoring and ranking. It should be reinitialized every time
// we refresh the set of peers or the view of the peers column custody.
func newRarityRanker(toCustody peerdas.ColumnIndices, custodians map[uint64][]*dasPeer) *rarityRanker {
rarity := make(map[uint64]float64, len(toCustody))
asc := make([]uint64, 0, len(toCustody))
for col := range toCustody.ToMap() {
rarity[col] = 1 / max(1, float64(len(custodians[col])))
asc = append(asc, col)
}
slices.SortFunc(asc, func(a, b uint64) int {
return cmp.Compare(rarity[a], rarity[b])
})
return &rarityRanker{rarity: rarity, asc: asc}
}
// rank returns the requested columns sorted by ascending rarity.
func (rr *rarityRanker) ascendingRarity(cols peerdas.ColumnIndices) []uint64 {
ranked := make([]uint64, 0, len(cols))
for _, col := range rr.asc {
if cols.Has(col) {
ranked = append(ranked, col)
}
}
return ranked
}
// score gives a score representing the sum of the rarity scores of the given columns. It can be used to
// score peers based on the set intersection of their custodied indices and the indices we need to request.
func (rr *rarityRanker) score(coverage peerdas.ColumnIndices) float64 {
score := 0.0
for col := range coverage.ToMap() {
score += rr.rarity[col]
}
return score
}

View File

@@ -72,9 +72,10 @@ func (f *blocksFetcher) waitForMinimumPeers(ctx context.Context) ([]peer.ID, err
}
var peers []peer.ID
if f.mode == modeStopOnFinalizedEpoch {
cp := f.chain.FinalizedCheckpt()
headEpoch := cp.Epoch
_, peers = f.p2p.Peers().BestFinalized(params.BeaconConfig().MaxPeersToSync, headEpoch)
_, peers = f.p2p.Peers().BestFinalized(f.chain.FinalizedCheckpt().Epoch)
if len(peers) > params.BeaconConfig().MaxPeersToSync {
peers = peers[:params.BeaconConfig().MaxPeersToSync]
}
} else {
headEpoch := slots.ToEpoch(f.chain.HeadSlot())
_, peers = f.p2p.Peers().BestNonFinalized(flags.Get().MinimumSyncPeers, headEpoch)

View File

@@ -516,7 +516,10 @@ func TestBlocksFetcher_requestBeaconBlocksByRange(t *testing.T) {
p2p: p2p,
})
_, peerIDs := p2p.Peers().BestFinalized(params.BeaconConfig().MaxPeersToSync, slots.ToEpoch(mc.HeadSlot()))
_, peerIDs := p2p.Peers().BestFinalized(slots.ToEpoch(mc.HeadSlot()))
if len(peerIDs) > params.BeaconConfig().MaxPeersToSync {
peerIDs = peerIDs[:params.BeaconConfig().MaxPeersToSync]
}
req := &ethpb.BeaconBlocksByRangeRequest{
StartSlot: 1,
Step: 1,

View File

@@ -331,9 +331,7 @@ func (f *blocksFetcher) findAncestor(ctx context.Context, pid peer.ID, b interfa
// bestFinalizedSlot returns the highest finalized slot of the majority of connected peers.
func (f *blocksFetcher) bestFinalizedSlot() primitives.Slot {
cp := f.chain.FinalizedCheckpt()
finalizedEpoch, _ := f.p2p.Peers().BestFinalized(
params.BeaconConfig().MaxPeersToSync, cp.Epoch)
finalizedEpoch, _ := f.p2p.Peers().BestFinalized(f.chain.FinalizedCheckpt().Epoch)
return params.BeaconConfig().SlotsPerEpoch.Mul(uint64(finalizedEpoch))
}
@@ -350,7 +348,10 @@ func (f *blocksFetcher) calculateHeadAndTargetEpochs() (headEpoch, targetEpoch p
if f.mode == modeStopOnFinalizedEpoch {
cp := f.chain.FinalizedCheckpt()
headEpoch = cp.Epoch
targetEpoch, peers = f.p2p.Peers().BestFinalized(params.BeaconConfig().MaxPeersToSync, headEpoch)
targetEpoch, peers = f.p2p.Peers().BestFinalized(headEpoch)
if len(peers) > params.BeaconConfig().MaxPeersToSync {
peers = peers[:params.BeaconConfig().MaxPeersToSync]
}
return headEpoch, targetEpoch, peers
}

View File

@@ -28,10 +28,10 @@ const (
)
// blockReceiverFn defines block receiving function.
type blockReceiverFn func(ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, avs das.AvailabilityStore) error
type blockReceiverFn func(ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, avs das.AvailabilityChecker) error
// batchBlockReceiverFn defines batch receiving function.
type batchBlockReceiverFn func(ctx context.Context, blks []blocks.ROBlock, avs das.AvailabilityStore) error
type batchBlockReceiverFn func(ctx context.Context, blks []blocks.ROBlock, avs das.AvailabilityChecker) error
// Round Robin sync looks at the latest peer statuses and syncs up to the highest known epoch.
//
@@ -175,7 +175,7 @@ func (s *Service) processFetchedDataRegSync(ctx context.Context, data *blocksQue
blocksWithDataColumns := bwb[fistDataColumnIndex:]
blobBatchVerifier := verification.NewBlobBatchVerifier(s.newBlobVerifier, verification.InitsyncBlobSidecarRequirements)
lazilyPersistentStoreBlobs := das.NewLazilyPersistentStore(s.cfg.BlobStorage, blobBatchVerifier)
avs := das.NewLazilyPersistentStore(s.cfg.BlobStorage, blobBatchVerifier)
log := log.WithField("firstSlot", data.bwb[0].Block.Block().Slot())
logBlobs, logDataColumns := log, log
@@ -185,12 +185,12 @@ func (s *Service) processFetchedDataRegSync(ctx context.Context, data *blocksQue
}
for i, b := range blocksWithBlobs {
if err := lazilyPersistentStoreBlobs.Persist(s.clock.CurrentSlot(), b.Blobs...); err != nil {
if err := avs.Persist(s.clock.CurrentSlot(), b.Blobs...); err != nil {
logBlobs.WithError(err).WithFields(syncFields(b.Block)).Warning("Batch failure due to BlobSidecar issues")
return uint64(i), err
}
if err := s.processBlock(ctx, s.genesisTime, b, s.cfg.Chain.ReceiveBlock, lazilyPersistentStoreBlobs); err != nil {
if err := s.processBlock(ctx, s.genesisTime, b, s.cfg.Chain.ReceiveBlock, avs); err != nil {
if errors.Is(err, errParentDoesNotExist) {
logBlobs.WithField("missingParent", fmt.Sprintf("%#x", b.Block.Block().ParentRoot())).
WithFields(syncFields(b.Block)).Debug("Could not process batch blocks due to missing parent")
@@ -313,7 +313,7 @@ func (s *Service) processBlock(
genesis time.Time,
bwb blocks.BlockWithROSidecars,
blockReceiver blockReceiverFn,
avs das.AvailabilityStore,
avs das.AvailabilityChecker,
) error {
blk := bwb.Block
blkRoot := blk.Root()

View File

@@ -376,7 +376,7 @@ func TestService_processBlock(t *testing.T) {
rowsb, err := blocks.NewROBlock(wsb)
require.NoError(t, err)
err = s.processBlock(ctx, genesis, blocks.BlockWithROSidecars{Block: rowsb}, func(
ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, _ das.AvailabilityStore) error {
ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, _ das.AvailabilityChecker) error {
assert.NoError(t, s.cfg.Chain.ReceiveBlock(ctx, block, blockRoot, nil))
return nil
}, nil)
@@ -388,7 +388,7 @@ func TestService_processBlock(t *testing.T) {
rowsb, err = blocks.NewROBlock(wsb)
require.NoError(t, err)
err = s.processBlock(ctx, genesis, blocks.BlockWithROSidecars{Block: rowsb}, func(
ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, _ das.AvailabilityStore) error {
ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, _ das.AvailabilityChecker) error {
return nil
}, nil)
assert.ErrorContains(t, errBlockAlreadyProcessed.Error(), err)
@@ -399,7 +399,7 @@ func TestService_processBlock(t *testing.T) {
rowsb, err = blocks.NewROBlock(wsb)
require.NoError(t, err)
err = s.processBlock(ctx, genesis, blocks.BlockWithROSidecars{Block: rowsb}, func(
ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, _ das.AvailabilityStore) error {
ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, _ das.AvailabilityChecker) error {
assert.NoError(t, s.cfg.Chain.ReceiveBlock(ctx, block, blockRoot, nil))
return nil
}, nil)
@@ -469,7 +469,7 @@ func TestService_processBlockBatch(t *testing.T) {
currBlockRoot = blk1Root
}
cbnormal := func(ctx context.Context, blks []blocks.ROBlock, avs das.AvailabilityStore) error {
cbnormal := func(ctx context.Context, blks []blocks.ROBlock, avs das.AvailabilityChecker) error {
assert.NoError(t, s.cfg.Chain.ReceiveBlockBatch(ctx, blks, avs))
return nil
}
@@ -478,7 +478,7 @@ func TestService_processBlockBatch(t *testing.T) {
assert.NoError(t, err)
require.Equal(t, uint64(len(batch)), count)
cbnil := func(ctx context.Context, blocks []blocks.ROBlock, _ das.AvailabilityStore) error {
cbnil := func(ctx context.Context, blocks []blocks.ROBlock, _ das.AvailabilityChecker) error {
return nil
}
@@ -851,7 +851,7 @@ func TestService_processBlocksWithDataColumns(t *testing.T) {
counter: ratecounter.NewRateCounter(counterSeconds * time.Second),
}
receiverFunc := func(ctx context.Context, blks []blocks.ROBlock, avs das.AvailabilityStore) error {
receiverFunc := func(ctx context.Context, blks []blocks.ROBlock, avs das.AvailabilityChecker) error {
require.Equal(t, 1, len(blks))
return nil
}

View File

@@ -297,7 +297,10 @@ func (s *Service) handleBlockProcessingError(ctx context.Context, err error, b i
// getBestPeers returns the list of best peers based on finalized checkpoint epoch.
func (s *Service) getBestPeers() []core.PeerID {
_, bestPeers := s.cfg.p2p.Peers().BestFinalized(maxPeerRequest, s.cfg.chain.FinalizedCheckpt().Epoch)
_, bestPeers := s.cfg.p2p.Peers().BestFinalized(s.cfg.chain.FinalizedCheckpt().Epoch)
if len(bestPeers) > maxPeerRequest {
bestPeers = bestPeers[:maxPeerRequest]
}
return bestPeers
}

View File

@@ -46,6 +46,12 @@ var (
errDataColumnChunkedReadFailure = errors.New("failed to read stream of chunk-encoded data columns")
errMaxRequestDataColumnSidecarsExceeded = errors.New("count of requested data column sidecars exceeds MAX_REQUEST_DATA_COLUMN_SIDECARS")
errMaxResponseDataColumnSidecarsExceeded = errors.New("peer returned more data column sidecars than requested")
errSidecarRPCValidation = errors.Wrap(ErrInvalidFetchedData, "DataColumnSidecar")
errSidecarSlotsUnordered = errors.Wrap(errSidecarRPCValidation, "slots not in ascending order")
errSidecarIndicesUnordered = errors.Wrap(errSidecarRPCValidation, "sidecar indices not in ascending order")
errSidecarSlotNotRequested = errors.Wrap(errSidecarRPCValidation, "sidecar slot not in range")
errSidecarIndexNotRequested = errors.Wrap(errSidecarRPCValidation, "sidecar index not requested")
)
// ------
@@ -459,6 +465,7 @@ func SendDataColumnSidecarsByRangeRequest(
p DataColumnSidecarsParams,
pid peer.ID,
request *ethpb.DataColumnSidecarsByRangeRequest,
vfs ...DataColumnResponseValidation,
) ([]blocks.RODataColumn, error) {
// Return early if nothing to request.
if request == nil || request.Count == 0 || len(request.Columns) == 0 {
@@ -512,6 +519,16 @@ func SendDataColumnSidecarsByRangeRequest(
}
defer closeStream(stream, log)
requestedSlot, err := isSidecarSlotRequested(request)
if err != nil {
return nil, errors.Wrap(err, "is sidecar slot within bounds")
}
vfs = append([]DataColumnResponseValidation{
areSidecarsOrdered(),
isSidecarIndexRequested(request),
requestedSlot,
}, vfs...)
// Read the data column sidecars from the stream.
roDataColumns := make([]blocks.RODataColumn, 0, totalCount)
for range totalCount {
@@ -520,20 +537,7 @@ func SendDataColumnSidecarsByRangeRequest(
return nil, err
}
validatorSlotWithinBounds, err := isSidecarSlotWithinBounds(request)
if err != nil {
if p.DownscorePeerOnRPCFault {
downscorePeer(p.P2P, pid, "servedSidecarSlotOutOfBounds")
}
return nil, errors.Wrap(err, "is sidecar slot within bounds")
}
roDataColumn, err := readChunkedDataColumnSidecar(
stream, p.P2P, p.CtxMap,
validatorSlotWithinBounds,
isSidecarIndexRequested(request),
)
roDataColumn, err := readChunkedDataColumnSidecar(stream, p.P2P, p.CtxMap, vfs...)
if errors.Is(err, io.EOF) {
if p.DownscorePeerOnRPCFault && len(roDataColumns) == 0 {
downscorePeer(p.P2P, pid, "noReturnedSidecar")
@@ -568,8 +572,8 @@ func SendDataColumnSidecarsByRangeRequest(
return roDataColumns, nil
}
// isSidecarSlotWithinBounds verifies that the slot of the data column sidecar is within the bounds of the request.
func isSidecarSlotWithinBounds(request *ethpb.DataColumnSidecarsByRangeRequest) (DataColumnResponseValidation, error) {
// isSidecarSlotRequested verifies that the slot of the data column sidecar is within the bounds of the request.
func isSidecarSlotRequested(request *ethpb.DataColumnSidecarsByRangeRequest) (DataColumnResponseValidation, error) {
// endSlot is exclusive (while request.StartSlot is inclusive).
endSlot, err := request.StartSlot.SafeAdd(request.Count)
if err != nil {
@@ -580,7 +584,7 @@ func isSidecarSlotWithinBounds(request *ethpb.DataColumnSidecarsByRangeRequest)
slot := sidecar.Slot()
if !(request.StartSlot <= slot && slot < endSlot) {
return errors.Errorf("data column sidecar slot %d out of range [%d, %d[", slot, request.StartSlot, endSlot)
return errors.Wrapf(errSidecarSlotNotRequested, "got=%d, want=[%d, %d)", slot, request.StartSlot, endSlot)
}
return nil
@@ -589,6 +593,29 @@ func isSidecarSlotWithinBounds(request *ethpb.DataColumnSidecarsByRangeRequest)
return validator, nil
}
// areSidecarsOrdered enforces the p2p spec rule:
// "The following data column sidecars, where they exist, MUST be sent in (slot, column_index) order."
// via https://github.com/ethereum/consensus-specs/blob/master/specs/fulu/p2p-interface.md#datacolumnsidecarsbyrange-v1
func areSidecarsOrdered() DataColumnResponseValidation {
var prevSlot primitives.Slot
var prevIdx uint64
return func(sidecar blocks.RODataColumn) error {
if sidecar.Slot() < prevSlot {
return errors.Wrapf(errSidecarSlotsUnordered, "got=%d, want>=%d", sidecar.Slot(), prevSlot)
}
if sidecar.Slot() > prevSlot {
prevIdx = 0 // reset index tracking for new slot
prevSlot = sidecar.Slot() // move slot tracking to new slot
}
if sidecar.Index < prevIdx {
return errors.Wrapf(errSidecarIndicesUnordered, "got=%d, want>=%d", sidecar.Index, prevIdx)
}
prevIdx = sidecar.Index
return nil
}
}
// isSidecarIndexRequested verifies that the index of the data column sidecar is found in the requested indices.
func isSidecarIndexRequested(request *ethpb.DataColumnSidecarsByRangeRequest) DataColumnResponseValidation {
requestedIndices := make(map[uint64]bool)
@@ -600,7 +627,7 @@ func isSidecarIndexRequested(request *ethpb.DataColumnSidecarsByRangeRequest) Da
columnIndex := sidecar.Index
if !requestedIndices[columnIndex] {
requested := helpers.SortedPrettySliceFromMap(requestedIndices)
return errors.Errorf("data column sidecar index %d returned by the peer but not found in requested indices %v", columnIndex, requested)
return errors.Wrapf(errSidecarIndexNotRequested, "%d not in %v", columnIndex, requested)
}
return nil
@@ -787,3 +814,11 @@ func downscorePeer(p2p p2p.P2P, peerID peer.ID, reason string, fields ...logrus.
newScore := p2p.Peers().Scorers().BadResponsesScorer().Increment(peerID)
log.WithFields(logrus.Fields{"peerID": peerID, "reason": reason, "newScore": newScore}).Debug("Downscore peer")
}
func DataColumnSidecarsByRangeRequest(columns []uint64, start, end primitives.Slot) *ethpb.DataColumnSidecarsByRangeRequest {
return &ethpb.DataColumnSidecarsByRangeRequest{
StartSlot: start,
Count: uint64(end-start) + 1,
Columns: columns,
}
}

View File

@@ -1071,7 +1071,7 @@ func TestIsSidecarSlotWithinBounds(t *testing.T) {
Count: 10,
}
validator, err := isSidecarSlotWithinBounds(request)
validator, err := isSidecarSlotRequested(request)
require.NoError(t, err)
testCases := []struct {

View File

@@ -0,0 +1,5 @@
### Added
- Data column backfill.
### Changed
- Changed default value for `--backfill-batch-size` from 32 to 4.

View File

@@ -286,6 +286,20 @@ func MaxSafeEpoch() primitives.Epoch {
return primitives.Epoch(math.MaxUint64 / uint64(params.BeaconConfig().SlotsPerEpoch))
}
// SafeEpochStartOrMax returns the start slot of the given epoch if it will not overflow,
// otherwise it takes the highest epoch that won't overflow,
// and to introduce a little margin for error, returns the slot beginning the prior epoch.
func SafeEpochStartOrMax(e primitives.Epoch) primitives.Slot {
// The max value converted to a slot can't be the start of a conceptual epoch,
// because the first slot of that epoch would be overflow
// so use the start slot of the epoch right before that value.
me := MaxSafeEpoch() - 1
if e > me {
return UnsafeEpochStart(me)
}
return UnsafeEpochStart(e)
}
// SecondsUntilNextEpochStart returns how many seconds until the next Epoch start from the current time and slot
func SecondsUntilNextEpochStart(genesis time.Time) (uint64, error) {
currentSlot := CurrentSlot(genesis)