Files
prysm/beacon-chain/sync/backfill/columns.go
kasey 61de11e2c4 Backfill data columns (#15580)
**What type of PR is this?**

Feature

**What does this PR do? Why is it needed?**

Adds data column support to backfill.

**Acknowledgements**

- [x] I have read
[CONTRIBUTING.md](https://github.com/prysmaticlabs/prysm/blob/develop/CONTRIBUTING.md).
- [x] I have included a uniquely named [changelog fragment
file](https://github.com/prysmaticlabs/prysm/blob/develop/CONTRIBUTING.md#maintaining-changelogmd).
- [x] I have added a description to this PR with sufficient context for
reviewers to understand this PR.

---------

Co-authored-by: Kasey <kasey@users.noreply.github.com>
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: Preston Van Loon <preston@pvl.dev>
2025-12-02 15:19:32 +00:00

283 lines
8.9 KiB
Go

package backfill
import (
"bytes"
"context"
"fmt"
"time"
"github.com/OffchainLabs/prysm/v7/beacon-chain/core/peerdas"
"github.com/OffchainLabs/prysm/v7/beacon-chain/das"
"github.com/OffchainLabs/prysm/v7/beacon-chain/db/filesystem"
"github.com/OffchainLabs/prysm/v7/beacon-chain/p2p"
"github.com/OffchainLabs/prysm/v7/beacon-chain/sync"
"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
"github.com/OffchainLabs/prysm/v7/consensus-types/primitives"
ethpb "github.com/OffchainLabs/prysm/v7/proto/prysm/v1alpha1"
"github.com/libp2p/go-libp2p/core/peer"
"github.com/pkg/errors"
)
var (
errInvalidDataColumnResponse = errors.New("invalid DataColumnSidecar response")
errUnexpectedBlockRoot = errors.Wrap(errInvalidDataColumnResponse, "unexpected sidecar block root")
errCommitmentLengthMismatch = errors.Wrap(errInvalidDataColumnResponse, "sidecar has different commitment count than block")
errCommitmentValueMismatch = errors.Wrap(errInvalidDataColumnResponse, "sidecar commitments do not match block")
)
// tune the amount of columns we try to download from peers at once.
// The spec limit is 128 * 32, but connection errors are more likely when
// requesting so much at once.
const columnRequestLimit = 128 * 4
type columnBatch struct {
first primitives.Slot
last primitives.Slot
custodyGroups peerdas.ColumnIndices
toDownload map[[32]byte]*toDownload
}
type toDownload struct {
remaining peerdas.ColumnIndices
commitments [][]byte
slot primitives.Slot
}
func (cs *columnBatch) needed() peerdas.ColumnIndices {
// make a copy that we can modify to reduce search iterations.
search := cs.custodyGroups.ToMap()
ci := peerdas.ColumnIndices{}
for _, v := range cs.toDownload {
if len(search) == 0 {
return ci
}
for col := range search {
if v.remaining.Has(col) {
ci.Set(col)
// avoid iterating every single block+index by only searching for indices
// we haven't found yet.
delete(search, col)
}
}
}
return ci
}
// pruneExpired removes any columns from the batch that are no longer needed.
// If `pruned` is non-nil, it is populated with the roots that were removed.
func (cs *columnBatch) pruneExpired(needs das.CurrentNeeds, pruned map[[32]byte]struct{}) {
for root, td := range cs.toDownload {
if !needs.Col.At(td.slot) {
delete(cs.toDownload, root)
if pruned != nil {
pruned[root] = struct{}{}
}
}
}
}
// neededSidecarCount returns the total number of sidecars still needed to complete the batch.
func (cs *columnBatch) neededSidecarCount() int {
count := 0
for _, v := range cs.toDownload {
count += v.remaining.Count()
}
return count
}
// neededSidecarsByColumn counts how many sidecars are still needed for each column index.
func (cs *columnBatch) neededSidecarsByColumn(peerHas peerdas.ColumnIndices) map[uint64]int {
need := make(map[uint64]int, len(peerHas))
for _, v := range cs.toDownload {
for idx := range v.remaining {
if peerHas.Has(idx) {
need[idx]++
}
}
}
return need
}
type columnSync struct {
*columnBatch
store *das.LazilyPersistentStoreColumn
current primitives.Slot
peer peer.ID
bisector *columnBisector
}
func newColumnSync(ctx context.Context, b batch, blks verifiedROBlocks, current primitives.Slot, p p2p.P2P, cfg *workerCfg) (*columnSync, error) {
cgc, err := p.CustodyGroupCount(ctx)
if err != nil {
return nil, errors.Wrap(err, "custody group count")
}
cb, err := buildColumnBatch(ctx, b, blks, p, cfg.colStore, cfg.currentNeeds())
if err != nil {
return nil, err
}
if cb == nil {
return &columnSync{}, nil
}
shouldRetain := func(sl primitives.Slot) bool {
needs := cfg.currentNeeds()
return needs.Col.At(sl)
}
bisector := newColumnBisector(cfg.downscore)
return &columnSync{
columnBatch: cb,
current: current,
store: das.NewLazilyPersistentStoreColumn(cfg.colStore, cfg.newVC, p.NodeID(), cgc, bisector, shouldRetain),
bisector: bisector,
}, nil
}
func (cs *columnSync) blockColumns(root [32]byte) *toDownload {
if cs.columnBatch == nil {
return nil
}
return cs.columnBatch.toDownload[root]
}
func (cs *columnSync) columnsNeeded() peerdas.ColumnIndices {
if cs.columnBatch == nil {
return peerdas.ColumnIndices{}
}
return cs.columnBatch.needed()
}
func (cs *columnSync) request(reqCols []uint64, limit int) (*ethpb.DataColumnSidecarsByRangeRequest, error) {
if len(reqCols) == 0 {
return nil, nil
}
// Use cheaper check to avoid allocating map and counting sidecars if under limit.
if cs.neededSidecarCount() <= limit {
return sync.DataColumnSidecarsByRangeRequest(reqCols, cs.first, cs.last)
}
// Re-slice b.nextReqCols to keep the number of requested sidecars under the limit.
reqCount := 0
peerHas := peerdas.NewColumnIndicesFromSlice(reqCols)
needed := cs.neededSidecarsByColumn(peerHas)
for i := range reqCols {
addSidecars := needed[reqCols[i]]
if reqCount+addSidecars > columnRequestLimit {
reqCols = reqCols[:i]
break
}
reqCount += addSidecars
}
return sync.DataColumnSidecarsByRangeRequest(reqCols, cs.first, cs.last)
}
type validatingColumnRequest struct {
req *ethpb.DataColumnSidecarsByRangeRequest
columnSync *columnSync
bisector *columnBisector
}
func (v *validatingColumnRequest) validate(cd blocks.RODataColumn) (err error) {
defer func(validity string, start time.Time) {
dataColumnSidecarVerifyMs.Observe(float64(time.Since(start).Milliseconds()))
if err != nil {
validity = "invalid"
}
dataColumnSidecarDownloadCount.WithLabelValues(fmt.Sprintf("%d", cd.Index), validity).Inc()
dataColumnSidecarDownloadBytes.Add(float64(cd.SizeSSZ()))
}("valid", time.Now())
return v.countedValidation(cd)
}
// When we call Persist we'll get the verification checks that are provided by the availability store.
// In addition to those checks this function calls rpcValidity which maintains a state machine across
// response values to ensure that the response is valid in the context of the overall request,
// like making sure that the block roots is one of the ones we expect based on the blocks we used to
// construct the request. It also does cheap sanity checks on the DataColumnSidecar values like
// ensuring that the commitments line up with the block.
func (v *validatingColumnRequest) countedValidation(cd blocks.RODataColumn) error {
root := cd.BlockRoot()
expected := v.columnSync.blockColumns(root)
if expected == nil {
return errors.Wrapf(errUnexpectedBlockRoot, "root=%#x, slot=%d", root, cd.Slot())
}
// We don't need this column, but we trust the column state machine verified we asked for it as part of a range request.
// So we can just skip over it and not try to persist it.
if !expected.remaining.Has(cd.Index) {
return nil
}
if len(cd.KzgCommitments) != len(expected.commitments) {
return errors.Wrapf(errCommitmentLengthMismatch, "root=%#x, slot=%d, index=%d", root, cd.Slot(), cd.Index)
}
for i, cmt := range cd.KzgCommitments {
if !bytes.Equal(cmt, expected.commitments[i]) {
return errors.Wrapf(errCommitmentValueMismatch, "root=%#x, slot=%d, index=%d", root, cd.Slot(), cd.Index)
}
}
if err := v.columnSync.store.Persist(v.columnSync.current, cd); err != nil {
return errors.Wrap(err, "persisting data column")
}
v.bisector.addPeerColumns(v.columnSync.peer, cd)
expected.remaining.Unset(cd.Index)
return nil
}
func currentCustodiedColumns(ctx context.Context, p p2p.P2P) (peerdas.ColumnIndices, error) {
cgc, err := p.CustodyGroupCount(ctx)
if err != nil {
return nil, errors.Wrap(err, "custody group count")
}
peerInfo, _, err := peerdas.Info(p.NodeID(), cgc)
if err != nil {
return nil, errors.Wrap(err, "peer info")
}
return peerdas.NewColumnIndicesFromMap(peerInfo.CustodyColumns), nil
}
func buildColumnBatch(ctx context.Context, b batch, blks verifiedROBlocks, p p2p.P2P, store *filesystem.DataColumnStorage, needs das.CurrentNeeds) (*columnBatch, error) {
if len(blks) == 0 {
return nil, nil
}
if !needs.Col.At(b.begin) && !needs.Col.At(b.end-1) {
return nil, nil
}
indices, err := currentCustodiedColumns(ctx, p)
if err != nil {
return nil, errors.Wrap(err, "current custodied columns")
}
summary := &columnBatch{
custodyGroups: indices,
toDownload: make(map[[32]byte]*toDownload, len(blks)),
}
for _, b := range blks {
slot := b.Block().Slot()
if !needs.Col.At(slot) {
continue
}
cmts, err := b.Block().Body().BlobKzgCommitments()
if err != nil {
return nil, errors.Wrap(err, "failed to get blob kzg commitments")
}
if len(cmts) == 0 {
continue
}
// The last block this part of the loop sees will be the last one
// we need to download data columns for.
if len(summary.toDownload) == 0 {
// toDownload is only empty the first time through, so this is the first block with data columns.
summary.first = slot
}
summary.last = slot
summary.toDownload[b.Root()] = &toDownload{
remaining: das.IndicesNotStored(store.Summary(b.Root()), indices),
commitments: cmts,
slot: slot,
}
}
return summary, nil
}