Backfill data columns (#15580)

**What type of PR is this?** Feature **What does this PR do? Why is it needed?** Adds data column support to backfill. **Acknowledgements** - [x] I have read [CONTRIBUTING.md](https://github.com/prysmaticlabs/prysm/blob/develop/CONTRIBUTING.md). - [x] I have included a uniquely named [changelog fragment file](https://github.com/prysmaticlabs/prysm/blob/develop/CONTRIBUTING.md#maintaining-changelogmd). - [x] I have added a description to this PR with sufficient context for reviewers to understand this PR. --------- Co-authored-by: Kasey <kasey@users.noreply.github.com> Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Preston Van Loon <preston@pvl.dev>
2026-01-08 23:18:15 -05:00 · 2025-12-02 10:19:32 -05:00
parent 2773bdef89
commit 61de11e2c4
84 changed files with 10944 additions and 964 deletions
--- a/beacon-chain/das/data_column_cache.go
+++ b/beacon-chain/das/data_column_cache.go
@@ -1,9 +1,7 @@
 package das

 import (
-	"bytes"
-	"slices"
-
+	"github.com/OffchainLabs/prysm/v7/beacon-chain/core/peerdas"
 	"github.com/OffchainLabs/prysm/v7/beacon-chain/db/filesystem"
 	fieldparams "github.com/OffchainLabs/prysm/v7/config/fieldparams"
 	"github.com/OffchainLabs/prysm/v7/consensus-types/blocks"
@@ -11,9 +9,9 @@ import (
 )

 var (
-	ErrDuplicateSidecar   = errors.New("duplicate sidecar stashed in AvailabilityStore")
+	errDuplicateSidecar   = errors.New("duplicate sidecar stashed in AvailabilityStore")
 	errColumnIndexTooHigh = errors.New("column index too high")
-	errCommitmentMismatch = errors.New("KzgCommitment of sidecar in cache did not match block commitment")
+	errCommitmentMismatch = errors.New("commitment of sidecar in cache did not match block commitment")
 	errMissingSidecar     = errors.New("no sidecar in cache for block commitment")
 )

@@ -25,107 +23,80 @@ func newDataColumnCache() *dataColumnCache {
 	return &dataColumnCache{entries: make(map[cacheKey]*dataColumnCacheEntry)}
 }

-// ensure returns the entry for the given key, creating it if it isn't already present.
-func (c *dataColumnCache) ensure(key cacheKey) *dataColumnCacheEntry {
+// entry returns the entry for the given key, creating it if it isn't already present.
+func (c *dataColumnCache) entry(key cacheKey) *dataColumnCacheEntry {
 	entry, ok := c.entries[key]
 	if !ok {
-		entry = &dataColumnCacheEntry{}
+		entry = newDataColumnCacheEntry(key.root)
 		c.entries[key] = entry
 	}

 	return entry
 }

+func (c *dataColumnCache) cleanup(blks []blocks.ROBlock) {
+	for _, block := range blks {
+		key := cacheKey{slot: block.Block().Slot(), root: block.Root()}
+		c.delete(key)
+	}
+}
+
 // delete removes the cache entry from the cache.
 func (c *dataColumnCache) delete(key cacheKey) {
 	delete(c.entries, key)
 }

-// dataColumnCacheEntry holds a fixed-length cache of BlobSidecars.
-type dataColumnCacheEntry struct {
-	scs         [fieldparams.NumberOfColumns]*blocks.RODataColumn
-	diskSummary filesystem.DataColumnStorageSummary
+func (c *dataColumnCache) stash(sc blocks.RODataColumn) error {
+	key := cacheKey{slot: sc.Slot(), root: sc.BlockRoot()}
+	entry := c.entry(key)
+	return entry.stash(sc)
 }

-func (e *dataColumnCacheEntry) setDiskSummary(sum filesystem.DataColumnStorageSummary) {
-	e.diskSummary = sum
+func newDataColumnCacheEntry(root [32]byte) *dataColumnCacheEntry {
+	return &dataColumnCacheEntry{scs: make(map[uint64]blocks.RODataColumn), root: &root}
+}
+
+// dataColumnCacheEntry is the set of RODataColumns for a given block.
+type dataColumnCacheEntry struct {
+	root *[32]byte
+	scs  map[uint64]blocks.RODataColumn
 }

 // stash adds an item to the in-memory cache of DataColumnSidecars.
-// Only the first DataColumnSidecar of a given Index will be kept in the cache.
-// stash will return an error if the given data colunn is already in the cache, or if the Index is out of bounds.
-func (e *dataColumnCacheEntry) stash(sc *blocks.RODataColumn) error {
+// stash will return an error if the given data column Index is out of bounds.
+// It will overwrite any existing entry for the same index.
+func (e *dataColumnCacheEntry) stash(sc blocks.RODataColumn) error {
 	if sc.Index >= fieldparams.NumberOfColumns {
 		return errors.Wrapf(errColumnIndexTooHigh, "index=%d", sc.Index)
 	}
-
-	if e.scs[sc.Index] != nil {
-		return errors.Wrapf(ErrDuplicateSidecar, "root=%#x, index=%d, commitment=%#x", sc.BlockRoot(), sc.Index, sc.KzgCommitments)
-	}
-
 	e.scs[sc.Index] = sc
-
 	return nil
 }

-func (e *dataColumnCacheEntry) filter(root [32]byte, commitmentsArray *safeCommitmentsArray) ([]blocks.RODataColumn, error) {
-	nonEmptyIndices := commitmentsArray.nonEmptyIndices()
-	if e.diskSummary.AllAvailable(nonEmptyIndices) {
-		return nil, nil
+// append appends the requested root and indices from the cache to the given sidecars slice and returns the result.
+// If any of the given indices are missing, an error will be returned and the sidecars slice will be unchanged.
+func (e *dataColumnCacheEntry) append(sidecars []blocks.RODataColumn, indices peerdas.ColumnIndices) ([]blocks.RODataColumn, error) {
+	needed := indices.ToMap()
+	for col := range needed {
+		_, ok := e.scs[col]
+		if !ok {
+			return nil, errors.Wrapf(errMissingSidecar, "root=%#x, index=%#x", e.root, col)
+		}
 	}
-
-	commitmentsCount := commitmentsArray.count()
-	sidecars := make([]blocks.RODataColumn, 0, commitmentsCount)
-
-	for i := range nonEmptyIndices {
-		if e.diskSummary.HasIndex(i) {
-			continue
-		}
-
-		if e.scs[i] == nil {
-			return nil, errors.Wrapf(errMissingSidecar, "root=%#x, index=%#x", root, i)
-		}
-
-		if !sliceBytesEqual(commitmentsArray[i], e.scs[i].KzgCommitments) {
-			return nil, errors.Wrapf(errCommitmentMismatch, "root=%#x, index=%#x, commitment=%#x, block commitment=%#x", root, i, e.scs[i].KzgCommitments, commitmentsArray[i])
-		}
-
-		sidecars = append(sidecars, *e.scs[i])
+	// Loop twice so we can avoid touching the slice if any of the blobs are missing.
+	for col := range needed {
+		sidecars = append(sidecars, e.scs[col])
 	}
-
 	return sidecars, nil
 }

-// safeCommitmentsArray is a fixed size array of commitments.
-// This is helpful for avoiding gratuitous bounds checks.
-type safeCommitmentsArray [fieldparams.NumberOfColumns][][]byte
-
-// count returns the number of commitments in the array.
-func (s *safeCommitmentsArray) count() int {
-	count := 0
-
-	for i := range s {
-		if s[i] != nil {
-			count++
+// IndicesNotStored filters the list of indices to only include those that are not found in the storage summary.
+func IndicesNotStored(sum filesystem.DataColumnStorageSummary, indices peerdas.ColumnIndices) peerdas.ColumnIndices {
+	indices = indices.Copy()
+	for col := range indices {
+		if sum.HasIndex(col) {
+			indices.Unset(col)
 		}
 	}
-
-	return count
-}
-
-// nonEmptyIndices returns a map of indices that are non-nil in the array.
-func (s *safeCommitmentsArray) nonEmptyIndices() map[uint64]bool {
-	columns := make(map[uint64]bool)
-
-	for i := range s {
-		if s[i] != nil {
-			columns[uint64(i)] = true
-		}
-	}
-
-	return columns
-}
-
-func sliceBytesEqual(a, b [][]byte) bool {
-	return slices.EqualFunc(a, b, bytes.Equal)
+	return indices
 }