Retry logic for getBlobsV2 in peerDAS (#15520)

* PeerDAS: Implement sync

* Fix Potuz's comment.

* Fix Potuz's comment.

* Fix Potuz's comment.

* Fix Potuz's comment.

* Fix Potuz's comment.

* Implement `TestFetchDataColumnSidecarsFromPeers`.

* Implement `TestSelectPeers`.

* Fix James' comment.

* Fix flakiness in `TestSelectPeers`.

* Revert "Fix Potuz's comment."

This reverts commit c45230b455.

* Revert "Fix James' comment."

This reverts commit a3f919205a.

* `selectPeers`: Avoid map with key but empty value.

* Fix Potuz's comment.

* Add DataColumnStorage and SubscribeAllDataSubnets flag.

* getBlobsV2: retry if reconstruction isnt successful

* test: engine client and sync package, metrics

* lint: fmt and log capitalisation

* lint: return error when it is not nil

* config: make retry interval configurable

* sidecar: recover function and different context for retrying

* lint: remove unused field

* beacon: default retry interval

* reconstruct: load once, correctly deliver the result to all waiting goroutines

* reconstruct: simplify multi goroutine case and avoid race condition

* engine: remove isDataAlreadyAvailable function

* sync: no goroutine, getblobsv2 in absence of block as well, wrap error

* exec: hardcode retry interval

* da: non blocking checks

* sync: remove unwanted checks

* execution: fix test

* execution: retry atomicity test

* da: updated IsDataAvailable

* sync: remove unwanted tests

* bazel: bazel run //:gazelle -- fix

* blockchain: fix CustodyGroupCount return

* lint: formatting

* lint: lint and use unused metrics

* execution: retry logic inside ReconstructDataColumnSidecars itself

* lint: format

* execution: ensure the retry actually happens when it needs to

* execution: ensure single responsibility, execution should not do DA check

* sync: don't call ReconstructDataColumnSidecars if not required

* blockchain: move IsDataAvailable interface to blockchain package

* execution: make reconstructSingleflight part of the service struct

* blockchain: cleaner DA check

* lint: formatting and remove confusing comment

* sync: fix lint, test and add extra test for when data is actually not available

* sync: new appropriate mock service

* execution: edge case - delete activeRetries on success

* execution: use service context instead of function's for retry

* blockchain: get variable samplesPerSlot only when required

* remove redundant function and fix name

* fix test

* fix more tests

* put samplesPerSlot at appropriate place

* tidy up IsDataAvailable

* correct bad merge

* fix bad merge

* remove redundant flag option

* refactor to deduplicate sidecar construction code

* - Add godocs
- Rename some functions to be closer to the spec
- Add err in return of commitments

* Replace mutating public method (but only internally used) `Populate` but private not mutating method `extract`.

* Implement a unique `processDataColumnSidecarsFromExecution` instead 2 separate functions from block and from sidecar.

* `ReceiveBlock`: Wrap errors.

* Remove useless tests.

* `ConstructionPopulator`: Add tests.

* Fix tests

* Move functions to be consistent with blobs.

* `fetchCellsAndProofsFromExecution`: Avoid useless flattening.

* `processDataColumnSidecarsFromExecution`: Stop using DB cache.

---------

Co-authored-by: Manu NALEPA <enalepa@offchainlabs.com>
Co-authored-by: Kasey Kirkham <kasey@users.noreply.github.com>
This commit is contained in:
satushh
2025-09-16 21:35:35 +01:00
committed by GitHub
parent a5e4fccb47
commit 600169a53b
42 changed files with 1177 additions and 738 deletions

View File

@@ -1,12 +1,76 @@
package peerdas
import (
"time"
"github.com/OffchainLabs/prysm/v6/beacon-chain/blockchain/kzg"
beaconState "github.com/OffchainLabs/prysm/v6/beacon-chain/state"
fieldparams "github.com/OffchainLabs/prysm/v6/config/fieldparams"
"github.com/OffchainLabs/prysm/v6/config/params"
"github.com/OffchainLabs/prysm/v6/consensus-types/blocks"
"github.com/OffchainLabs/prysm/v6/consensus-types/primitives"
ethpb "github.com/OffchainLabs/prysm/v6/proto/prysm/v1alpha1"
"github.com/pkg/errors"
)
var (
ErrNilSignedBlockOrEmptyCellsAndProofs = errors.New("nil signed block or empty cells and proofs")
ErrSizeMismatch = errors.New("mismatch in the number of blob KZG commitments and cellsAndProofs")
ErrNotEnoughDataColumnSidecars = errors.New("not enough columns")
ErrDataColumnSidecarsNotSortedByIndex = errors.New("data column sidecars are not sorted by index")
)
var (
_ ConstructionPopulator = (*BlockReconstructionSource)(nil)
_ ConstructionPopulator = (*SidecarReconstructionSource)(nil)
)
const (
BlockType = "BeaconBlock"
SidecarType = "DataColumnSidecar"
)
type (
// ConstructionPopulator is an interface that can be satisfied by a type that can use data from a struct
// like a DataColumnSidecar or a BeaconBlock to set the fields in a data column sidecar that cannot
// be obtained from the engine api.
ConstructionPopulator interface {
Slot() primitives.Slot
Root() [fieldparams.RootLength]byte
ProposerIndex() primitives.ValidatorIndex
Commitments() ([][]byte, error)
Type() string
extract() (*blockInfo, error)
}
// BlockReconstructionSource is a ConstructionPopulator that uses a beacon block as the source of data
BlockReconstructionSource struct {
blocks.ROBlock
}
// DataColumnSidecar is a ConstructionPopulator that uses a data column sidecar as the source of data
SidecarReconstructionSource struct {
blocks.VerifiedRODataColumn
}
blockInfo struct {
signedBlockHeader *ethpb.SignedBeaconBlockHeader
kzgCommitments [][]byte
kzgInclusionProof [][]byte
}
)
// PopulateFromBlock creates a BlockReconstructionSource from a beacon block
func PopulateFromBlock(block blocks.ROBlock) *BlockReconstructionSource {
return &BlockReconstructionSource{ROBlock: block}
}
// PopulateFromSidecar creates a SidecarReconstructionSource from a data column sidecar
func PopulateFromSidecar(sidecar blocks.VerifiedRODataColumn) *SidecarReconstructionSource {
return &SidecarReconstructionSource{VerifiedRODataColumn: sidecar}
}
// ValidatorsCustodyRequirement returns the number of custody groups regarding the validator indices attached to the beacon node.
// https://github.com/ethereum/consensus-specs/blob/master/specs/fulu/validator.md#validator-custody
func ValidatorsCustodyRequirement(state beaconState.ReadOnlyBeaconState, validatorsIndex map[primitives.ValidatorIndex]bool) (uint64, error) {
@@ -28,3 +92,159 @@ func ValidatorsCustodyRequirement(state beaconState.ReadOnlyBeaconState, validat
count := totalNodeBalance / balancePerAdditionalCustodyGroup
return min(max(count, validatorCustodyRequirement), numberOfCustodyGroups), nil
}
// DataColumnSidecars, given ConstructionPopulator and the cells/proofs associated with each blob in the
// block, assembles sidecars which can be distributed to peers.
// This is an adapted version of
// https://github.com/ethereum/consensus-specs/blob/master/specs/fulu/validator.md#get_data_column_sidecars,
// which is designed to be used both when constructing sidecars from a block and from a sidecar, replacing
// https://github.com/ethereum/consensus-specs/blob/master/specs/fulu/validator.md#get_data_column_sidecars_from_block and
// https://github.com/ethereum/consensus-specs/blob/master/specs/fulu/validator.md#get_data_column_sidecars_from_column_sidecar
func DataColumnSidecars(rows []kzg.CellsAndProofs, src ConstructionPopulator) ([]blocks.RODataColumn, error) {
if len(rows) == 0 {
return nil, nil
}
start := time.Now()
cells, proofs, err := rotateRowsToCols(rows, params.BeaconConfig().NumberOfColumns)
if err != nil {
return nil, errors.Wrap(err, "rotate cells and proofs")
}
maxIdx := params.BeaconConfig().NumberOfColumns
roSidecars := make([]blocks.RODataColumn, 0, maxIdx)
for idx := range maxIdx {
info, err := src.extract()
if err != nil {
return nil, errors.Wrap(err, "extract block info")
}
sidecar := &ethpb.DataColumnSidecar{
Index: idx,
Column: cells[idx],
KzgCommitments: info.kzgCommitments,
KzgProofs: proofs[idx],
SignedBlockHeader: info.signedBlockHeader,
KzgCommitmentsInclusionProof: info.kzgInclusionProof,
}
if len(sidecar.KzgCommitments) != len(sidecar.Column) || len(sidecar.KzgCommitments) != len(sidecar.KzgProofs) {
return nil, ErrSizeMismatch
}
roSidecar, err := blocks.NewRODataColumnWithRoot(sidecar, src.Root())
if err != nil {
return nil, errors.Wrap(err, "new ro data column")
}
roSidecars = append(roSidecars, roSidecar)
}
dataColumnComputationTime.Observe(float64(time.Since(start).Milliseconds()))
return roSidecars, nil
}
// Slot returns the slot of the source
func (s *BlockReconstructionSource) Slot() primitives.Slot {
return s.Block().Slot()
}
// ProposerIndex returns the proposer index of the source
func (s *BlockReconstructionSource) ProposerIndex() primitives.ValidatorIndex {
return s.Block().ProposerIndex()
}
// Commitments returns the blob KZG commitments of the source
func (s *BlockReconstructionSource) Commitments() ([][]byte, error) {
c, err := s.Block().Body().BlobKzgCommitments()
if err != nil {
return nil, errors.Wrap(err, "blob KZG commitments")
}
return c, nil
}
// Type returns the type of the source
func (s *BlockReconstructionSource) Type() string {
return BlockType
}
// extract extracts the block information from the source
func (b *BlockReconstructionSource) extract() (*blockInfo, error) {
block := b.Block()
header, err := b.Header()
if err != nil {
return nil, errors.Wrap(err, "header")
}
commitments, err := block.Body().BlobKzgCommitments()
if err != nil {
return nil, errors.Wrap(err, "commitments")
}
inclusionProof, err := blocks.MerkleProofKZGCommitments(block.Body())
if err != nil {
return nil, errors.Wrap(err, "merkle proof kzg commitments")
}
info := &blockInfo{
signedBlockHeader: header,
kzgCommitments: commitments,
kzgInclusionProof: inclusionProof,
}
return info, nil
}
// rotateRowsToCols takes a 2D slice of cells and proofs, where the x is rows (blobs) and y is columns,
// and returns a 2D slice where x is columns and y is rows.
func rotateRowsToCols(rows []kzg.CellsAndProofs, numCols uint64) ([][][]byte, [][][]byte, error) {
if len(rows) == 0 {
return nil, nil, nil
}
cellCols := make([][][]byte, numCols)
proofCols := make([][][]byte, numCols)
for i, cp := range rows {
if uint64(len(cp.Cells)) != numCols {
return nil, nil, errors.Wrap(ErrNotEnoughDataColumnSidecars, "not enough cells")
}
if len(cp.Cells) != len(cp.Proofs) {
return nil, nil, errors.Wrap(ErrNotEnoughDataColumnSidecars, "not enough proofs")
}
for j := uint64(0); j < numCols; j++ {
if i == 0 {
cellCols[j] = make([][]byte, len(rows))
proofCols[j] = make([][]byte, len(rows))
}
cellCols[j][i] = cp.Cells[j][:]
proofCols[j][i] = cp.Proofs[j][:]
}
}
return cellCols, proofCols, nil
}
// Root returns the block root of the source
func (s *SidecarReconstructionSource) Root() [fieldparams.RootLength]byte {
return s.BlockRoot()
}
// Commmitments returns the blob KZG commitments of the source
func (s *SidecarReconstructionSource) Commitments() ([][]byte, error) {
return s.KzgCommitments, nil
}
// Type returns the type of the source
func (s *SidecarReconstructionSource) Type() string {
return SidecarType
}
// extract extracts the block information from the source
func (s *SidecarReconstructionSource) extract() (*blockInfo, error) {
info := &blockInfo{
signedBlockHeader: s.SignedBlockHeader,
kzgCommitments: s.KzgCommitments,
kzgInclusionProof: s.KzgCommitmentsInclusionProof,
}
return info, nil
}