allow other pkgs to check for blobs in pruning cache (#13788)

* allow other pkgs to check for blobs in pruning cache * address deepsource complaints * custom error to simplify test setup * add AllAvailable method * make storage summary slot field private * unit test and off-by-one fix * remove comment with copy of tested function --------- Co-authored-by: Kasey Kirkham <kasey@users.noreply.github.com>
2026-01-10 07:58:22 -05:00 · 2024-04-01 09:19:51 -05:00
parent 2b4bb5d890
commit 53fdd2d062
6 changed files with 329 additions and 94 deletions
--- a/beacon-chain/db/filesystem/BUILD.bazel
+++ b/beacon-chain/db/filesystem/BUILD.bazel
@@ -4,6 +4,7 @@ go_library(
    name = "go_default_library",
    srcs = [
        "blob.go",
+        "cache.go",
        "ephemeral.go",
        "log.go",
        "metrics.go",
@@ -33,6 +34,7 @@ go_test(
    name = "go_default_test",
    srcs = [
        "blob_test.go",
+        "cache_test.go",
        "pruner_test.go",
    ],
    embed = [":go_default_library"],
--- a/beacon-chain/db/filesystem/blob.go
+++ b/beacon-chain/db/filesystem/blob.go
@@ -1,6 +1,7 @@
 package filesystem

 import (
+	"context"
 	"fmt"
 	"os"
 	"path"
@@ -103,12 +104,29 @@ func (bs *BlobStorage) WarmCache() {
 		return
 	}
 	go func() {
-		if err := bs.pruner.prune(0); err != nil {
+		start := time.Now()
+		if err := bs.pruner.warmCache(); err != nil {
 			log.WithError(err).Error("Error encountered while warming up blob pruner cache")
 		}
+		log.WithField("elapsed", time.Since(start)).Info("Blob filesystem cache warm-up complete.")
 	}()
 }

+// ErrBlobStorageSummarizerUnavailable is a sentinel error returned when there is no pruner/cache available.
+// This should be used by code that optionally uses the summarizer to optimize rpc requests. Being able to
+// fallback when there is no summarizer allows client code to avoid test complexity where the summarizer doesn't matter.
+var ErrBlobStorageSummarizerUnavailable = errors.New("BlobStorage not initialized with a pruner or cache")
+
+// WaitForSummarizer blocks until the BlobStorageSummarizer is ready to use.
+// BlobStorageSummarizer is not ready immediately on node startup because it needs to sample the blob filesystem to
+// determine which blobs are available.
+func (bs *BlobStorage) WaitForSummarizer(ctx context.Context) (BlobStorageSummarizer, error) {
+	if bs.pruner == nil {
+		return nil, ErrBlobStorageSummarizerUnavailable
+	}
+	return bs.pruner.waitForCache(ctx)
+}
+
 // Save saves blobs given a list of sidecars.
 func (bs *BlobStorage) Save(sidecar blocks.VerifiedROBlob) error {
 	startTime := time.Now()
--- a/beacon-chain/db/filesystem/cache.go
+++ b/beacon-chain/db/filesystem/cache.go
@@ -0,0 +1,119 @@
+package filesystem
+
+import (
+	"sync"
+
+	fieldparams "github.com/prysmaticlabs/prysm/v5/config/fieldparams"
+	"github.com/prysmaticlabs/prysm/v5/config/params"
+	"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
+)
+
+// blobIndexMask is a bitmask representing the set of blob indices that are currently set.
+type blobIndexMask [fieldparams.MaxBlobsPerBlock]bool
+
+// BlobStorageSummary represents cached information about the BlobSidecars on disk for each root the cache knows about.
+type BlobStorageSummary struct {
+	slot primitives.Slot
+	mask blobIndexMask
+}
+
+// HasIndex returns true if the BlobSidecar at the given index is available in the filesystem.
+func (s BlobStorageSummary) HasIndex(idx uint64) bool {
+	// Protect from panic, but assume callers are sophisticated enough to not need an error telling them they have an invalid idx.
+	if idx >= fieldparams.MaxBlobsPerBlock {
+		return false
+	}
+	return s.mask[idx]
+}
+
+// AllAvailable returns true if we have all blobs for all indices from 0 to count-1.
+func (s BlobStorageSummary) AllAvailable(count int) bool {
+	if count > fieldparams.MaxBlobsPerBlock {
+		return false
+	}
+	for i := 0; i < count; i++ {
+		if !s.mask[i] {
+			return false
+		}
+	}
+	return true
+}
+
+// BlobStorageSummarizer can be used to receive a summary of metadata about blobs on disk for a given root.
+// The BlobStorageSummary can be used to check which indices (if any) are available for a given block by root.
+type BlobStorageSummarizer interface {
+	Summary(root [32]byte) BlobStorageSummary
+}
+
+type blobStorageCache struct {
+	mu     sync.RWMutex
+	nBlobs float64
+	cache  map[string]BlobStorageSummary
+}
+
+var _ BlobStorageSummarizer = &blobStorageCache{}
+
+func newBlobStorageCache() *blobStorageCache {
+	return &blobStorageCache{
+		cache: make(map[string]BlobStorageSummary, params.BeaconConfig().MinEpochsForBlobsSidecarsRequest*fieldparams.SlotsPerEpoch),
+	}
+}
+
+// Summary returns the BlobStorageSummary for `root`. The BlobStorageSummary can be used to check for the presence of
+// BlobSidecars based on Index.
+func (s *blobStorageCache) Summary(root [32]byte) BlobStorageSummary {
+	k := rootString(root)
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	return s.cache[k]
+}
+
+func (s *blobStorageCache) ensure(key string, slot primitives.Slot, idx uint64) error {
+	if idx >= fieldparams.MaxBlobsPerBlock {
+		return errIndexOutOfBounds
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	v := s.cache[key]
+	v.slot = slot
+	if !v.mask[idx] {
+		s.updateMetrics(1)
+	}
+	v.mask[idx] = true
+	s.cache[key] = v
+	return nil
+}
+
+func (s *blobStorageCache) slot(key string) (primitives.Slot, bool) {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	v, ok := s.cache[key]
+	if !ok {
+		return 0, false
+	}
+	return v.slot, ok
+}
+
+func (s *blobStorageCache) evict(key string) {
+	var deleted float64
+	s.mu.Lock()
+	v, ok := s.cache[key]
+	if ok {
+		for i := range v.mask {
+			if v.mask[i] {
+				deleted += 1
+			}
+		}
+	}
+	delete(s.cache, key)
+	s.mu.Unlock()
+	if deleted > 0 {
+		s.updateMetrics(-deleted)
+	}
+}
+
+func (s *blobStorageCache) updateMetrics(delta float64) {
+	s.nBlobs += delta
+	blobDiskCount.Set(s.nBlobs)
+	blobDiskSize.Set(s.nBlobs * bytesPerSidecar)
+}
--- a/beacon-chain/db/filesystem/cache_test.go
+++ b/beacon-chain/db/filesystem/cache_test.go
@@ -0,0 +1,150 @@
+package filesystem
+
+import (
+	"testing"
+
+	fieldparams "github.com/prysmaticlabs/prysm/v5/config/fieldparams"
+	"github.com/prysmaticlabs/prysm/v5/encoding/bytesutil"
+	"github.com/prysmaticlabs/prysm/v5/testing/require"
+)
+
+func TestSlotByRoot_Summary(t *testing.T) {
+	var noneSet, allSet, firstSet, lastSet, oneSet blobIndexMask
+	firstSet[0] = true
+	lastSet[len(lastSet)-1] = true
+	oneSet[1] = true
+	for i := range allSet {
+		allSet[i] = true
+	}
+	cases := []struct {
+		name     string
+		root     [32]byte
+		expected *blobIndexMask
+	}{
+		{
+			name: "not found",
+		},
+		{
+			name:     "none set",
+			expected: &noneSet,
+		},
+		{
+			name:     "index 1 set",
+			expected: &oneSet,
+		},
+		{
+			name:     "all set",
+			expected: &allSet,
+		},
+		{
+			name:     "first set",
+			expected: &firstSet,
+		},
+		{
+			name:     "last set",
+			expected: &lastSet,
+		},
+	}
+	sc := newBlobStorageCache()
+	for _, c := range cases {
+		if c.expected != nil {
+			key := rootString(bytesutil.ToBytes32([]byte(c.name)))
+			sc.cache[key] = BlobStorageSummary{slot: 0, mask: *c.expected}
+		}
+	}
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			key := bytesutil.ToBytes32([]byte(c.name))
+			sum := sc.Summary(key)
+			for i := range c.expected {
+				ui := uint64(i)
+				if c.expected == nil {
+					require.Equal(t, false, sum.HasIndex(ui))
+				} else {
+					require.Equal(t, c.expected[i], sum.HasIndex(ui))
+				}
+			}
+		})
+	}
+}
+
+func TestAllAvailable(t *testing.T) {
+	idxUpTo := func(u int) []int {
+		r := make([]int, u)
+		for i := range r {
+			r[i] = i
+		}
+		return r
+	}
+	require.DeepEqual(t, []int{}, idxUpTo(0))
+	require.DeepEqual(t, []int{0}, idxUpTo(1))
+	require.DeepEqual(t, []int{0, 1, 2, 3, 4, 5}, idxUpTo(6))
+	cases := []struct {
+		name   string
+		idxSet []int
+		count  int
+		aa     bool
+	}{
+		{
+			// If there are no blobs committed, then all the committed blobs are available.
+			name:  "none in idx, 0 arg",
+			count: 0,
+			aa:    true,
+		},
+		{
+			name:  "none in idx, 1 arg",
+			count: 1,
+			aa:    false,
+		},
+		{
+			name:   "first in idx, 1 arg",
+			idxSet: []int{0},
+			count:  1,
+			aa:     true,
+		},
+		{
+			name:   "second in idx, 1 arg",
+			idxSet: []int{1},
+			count:  1,
+			aa:     false,
+		},
+		{
+			name:   "first missing, 2 arg",
+			idxSet: []int{1},
+			count:  2,
+			aa:     false,
+		},
+		{
+			name:  "all missing, 1 arg",
+			count: 6,
+			aa:    false,
+		},
+		{
+			name:  "out of bound is safe",
+			count: fieldparams.MaxBlobsPerBlock + 1,
+			aa:    false,
+		},
+		{
+			name:   "max present",
+			count:  fieldparams.MaxBlobsPerBlock,
+			idxSet: idxUpTo(fieldparams.MaxBlobsPerBlock),
+			aa:     true,
+		},
+		{
+			name:   "one present",
+			count:  1,
+			idxSet: idxUpTo(1),
+			aa:     true,
+		},
+	}
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			var mask blobIndexMask
+			for _, idx := range c.idxSet {
+				mask[idx] = true
+			}
+			sum := BlobStorageSummary{mask: mask}
+			require.Equal(t, c.aa, sum.AllAvailable(c.count))
+		})
+	}
+}
--- a/beacon-chain/db/filesystem/pruner.go
+++ b/beacon-chain/db/filesystem/pruner.go
@@ -1,6 +1,7 @@
 package filesystem

 import (
+	"context"
 	"encoding/binary"
 	"io"
 	"path"
@@ -12,7 +13,6 @@ import (
 	"time"

 	"github.com/pkg/errors"
-	fieldparams "github.com/prysmaticlabs/prysm/v5/config/fieldparams"
 	"github.com/prysmaticlabs/prysm/v5/config/params"
 	"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
 	"github.com/prysmaticlabs/prysm/v5/time/slots"
@@ -32,7 +32,8 @@ type blobPruner struct {
 	sync.Mutex
 	prunedBefore atomic.Uint64
 	windowSize   primitives.Slot
-	slotMap      *slotForRoot
+	cache        *blobStorageCache
+	cacheWarmed  chan struct{}
 	fs           afero.Fs
 }

@@ -41,13 +42,14 @@ func newBlobPruner(fs afero.Fs, retain primitives.Epoch) (*blobPruner, error) {
 	if err != nil {
 		return nil, errors.Wrap(err, "could not set retentionSlots")
 	}
-	return &blobPruner{fs: fs, windowSize: r, slotMap: newSlotForRoot()}, nil
+	cw := make(chan struct{})
+	return &blobPruner{fs: fs, windowSize: r, cache: newBlobStorageCache(), cacheWarmed: cw}, nil
 }

 // notify updates the pruner's view of root->blob mappings. This allows the pruner to build a cache
 // of root->slot mappings and decide when to evict old blobs based on the age of present blobs.
 func (p *blobPruner) notify(root [32]byte, latest primitives.Slot, idx uint64) error {
-	if err := p.slotMap.ensure(rootString(root), latest, idx); err != nil {
+	if err := p.cache.ensure(rootString(root), latest, idx); err != nil {
 		return err
 	}
 	pruned := uint64(windowMin(latest, p.windowSize))
@@ -62,7 +64,7 @@ func (p *blobPruner) notify(root [32]byte, latest primitives.Slot, idx uint64) e
 	return nil
 }

-func windowMin(latest primitives.Slot, offset primitives.Slot) primitives.Slot {
+func windowMin(latest, offset primitives.Slot) primitives.Slot {
 	// Safely compute the first slot in the epoch for the latest slot
 	latest = latest - latest%params.BeaconConfig().SlotsPerEpoch
 	if latest < offset {
@@ -71,6 +73,23 @@ func windowMin(latest primitives.Slot, offset primitives.Slot) primitives.Slot {
 	return latest - offset
 }

+func (p *blobPruner) warmCache() error {
+	if err := p.prune(0); err != nil {
+		return err
+	}
+	close(p.cacheWarmed)
+	return nil
+}
+
+func (p *blobPruner) waitForCache(ctx context.Context) (*blobStorageCache, error) {
+	select {
+	case <-p.cacheWarmed:
+		return p.cache, nil
+	case <-ctx.Done():
+		return nil, ctx.Err()
+	}
+}
+
 // Prune prunes blobs in the base directory based on the retention epoch.
 // It deletes blobs older than currentEpoch - (retentionEpochs+bufferEpochs).
 // This is so that we keep a slight buffer and blobs are deleted after n+2 epochs.
@@ -122,7 +141,7 @@ func shouldRetain(slot, pruneBefore primitives.Slot) bool {

 func (p *blobPruner) tryPruneDir(dir string, pruneBefore primitives.Slot) (int, error) {
 	root := rootFromDir(dir)
-	slot, slotCached := p.slotMap.slot(root)
+	slot, slotCached := p.cache.slot(root)
 	// Return early if the slot is cached and doesn't need pruning.
 	if slotCached && shouldRetain(slot, pruneBefore) {
 		return 0, nil
@@ -151,7 +170,7 @@ func (p *blobPruner) tryPruneDir(dir string, pruneBefore primitives.Slot) (int,
 			if err != nil {
 				return 0, errors.Wrapf(err, "index could not be determined for blob file %s", scFiles[i])
 			}
-			if err := p.slotMap.ensure(root, slot, idx); err != nil {
+			if err := p.cache.ensure(root, slot, idx); err != nil {
 				return 0, errors.Wrapf(err, "could not update prune cache for blob file %s", scFiles[i])
 			}
 		}
@@ -179,7 +198,7 @@ func (p *blobPruner) tryPruneDir(dir string, pruneBefore primitives.Slot) (int,
 		return removed, errors.Wrapf(err, "unable to remove blob directory %s", dir)
 	}

-	p.slotMap.evict(rootFromDir(dir))
+	p.cache.evict(rootFromDir(dir))
 	return len(scFiles), nil
 }

@@ -269,71 +288,3 @@ func filterSsz(s string) bool {
 func filterPart(s string) bool {
 	return filepath.Ext(s) == dotPartExt
 }
-
-func newSlotForRoot() *slotForRoot {
-	return &slotForRoot{
-		cache: make(map[string]*slotCacheEntry, params.BeaconConfig().MinEpochsForBlobsSidecarsRequest*fieldparams.SlotsPerEpoch),
-	}
-}
-
-type slotCacheEntry struct {
-	slot primitives.Slot
-	mask [fieldparams.MaxBlobsPerBlock]bool
-}
-
-type slotForRoot struct {
-	sync.RWMutex
-	nBlobs float64
-	cache  map[string]*slotCacheEntry
-}
-
-func (s *slotForRoot) updateMetrics(delta float64) {
-	s.nBlobs += delta
-	blobDiskCount.Set(s.nBlobs)
-	blobDiskSize.Set(s.nBlobs * bytesPerSidecar)
-}
-
-func (s *slotForRoot) ensure(key string, slot primitives.Slot, idx uint64) error {
-	if idx >= fieldparams.MaxBlobsPerBlock {
-		return errIndexOutOfBounds
-	}
-	s.Lock()
-	defer s.Unlock()
-	v, ok := s.cache[key]
-	if !ok {
-		v = &slotCacheEntry{}
-	}
-	v.slot = slot
-	if !v.mask[idx] {
-		s.updateMetrics(1)
-	}
-	v.mask[idx] = true
-	s.cache[key] = v
-	return nil
-}
-
-func (s *slotForRoot) slot(key string) (primitives.Slot, bool) {
-	s.RLock()
-	defer s.RUnlock()
-	v, ok := s.cache[key]
-	if !ok {
-		return 0, false
-	}
-	return v.slot, ok
-}
-
-func (s *slotForRoot) evict(key string) {
-	s.Lock()
-	defer s.Unlock()
-	v, ok := s.cache[key]
-	var deleted float64
-	if ok {
-		for i := range v.mask {
-			if v.mask[i] {
-				deleted += 1
-			}
-		}
-		s.updateMetrics(-deleted)
-	}
-	delete(s.cache, key)
-}
--- a/beacon-chain/db/filesystem/pruner_test.go
+++ b/beacon-chain/db/filesystem/pruner_test.go
@@ -28,7 +28,7 @@ func TestTryPruneDir_CachedNotExpired(t *testing.T) {
 	root := fmt.Sprintf("%#x", sc.BlockRoot())
 	// This slot is right on the edge of what would need to be pruned, so by adding it to the cache and
 	// skipping any other test setup, we can be certain the hot cache path never touches the filesystem.
-	require.NoError(t, pr.slotMap.ensure(root, sc.Slot(), 0))
+	require.NoError(t, pr.cache.ensure(root, sc.Slot(), 0))
 	pruned, err := pr.tryPruneDir(root, pr.windowSize)
 	require.NoError(t, err)
 	require.Equal(t, 0, pruned)
@@ -45,7 +45,7 @@ func TestTryPruneDir_CachedExpired(t *testing.T) {
 		require.NoError(t, err)
 		root := fmt.Sprintf("%#x", sc.BlockRoot())
 		require.NoError(t, fs.Mkdir(root, directoryPermissions)) // make empty directory
-		require.NoError(t, pr.slotMap.ensure(root, sc.Slot(), 0))
+		require.NoError(t, pr.cache.ensure(root, sc.Slot(), 0))
 		pruned, err := pr.tryPruneDir(root, slot+1)
 		require.NoError(t, err)
 		require.Equal(t, 0, pruned)
@@ -63,7 +63,7 @@ func TestTryPruneDir_CachedExpired(t *testing.T) {

 		// check that the root->slot is cached
 		root := fmt.Sprintf("%#x", scs[0].BlockRoot())
-		cs, cok := bs.pruner.slotMap.slot(root)
+		cs, cok := bs.pruner.cache.slot(root)
 		require.Equal(t, true, cok)
 		require.Equal(t, slot, cs)

@@ -95,12 +95,12 @@ func TestTryPruneDir_SlotFromFile(t *testing.T) {

 		// check that the root->slot is cached
 		root := fmt.Sprintf("%#x", scs[0].BlockRoot())
-		cs, ok := bs.pruner.slotMap.slot(root)
+		cs, ok := bs.pruner.cache.slot(root)
 		require.Equal(t, true, ok)
 		require.Equal(t, slot, cs)
 		// evict it from the cache so that we trigger the file read path
-		bs.pruner.slotMap.evict(root)
-		_, ok = bs.pruner.slotMap.slot(root)
+		bs.pruner.cache.evict(root)
+		_, ok = bs.pruner.cache.slot(root)
 		require.Equal(t, false, ok)

 		// ensure that we see the saved files in the filesystem
@@ -119,7 +119,7 @@ func TestTryPruneDir_SlotFromFile(t *testing.T) {
 		fs, bs, err := NewEphemeralBlobStorageWithFs(t)
 		require.NoError(t, err)
 		// Set slot equal to the window size, so it should be retained.
-		var slot primitives.Slot = bs.pruner.windowSize
+		slot := bs.pruner.windowSize
 		_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, slot, 2)
 		scs, err := verification.BlobSidecarSliceNoop(sidecars)
 		require.NoError(t, err)
@@ -129,8 +129,8 @@ func TestTryPruneDir_SlotFromFile(t *testing.T) {

 		// Evict slot mapping from the cache so that we trigger the file read path.
 		root := fmt.Sprintf("%#x", scs[0].BlockRoot())
-		bs.pruner.slotMap.evict(root)
-		_, ok := bs.pruner.slotMap.slot(root)
+		bs.pruner.cache.evict(root)
+		_, ok := bs.pruner.cache.slot(root)
 		require.Equal(t, false, ok)

 		// Ensure that we see the saved files in the filesystem.
@@ -243,10 +243,8 @@ func TestListDir(t *testing.T) {
 	}
 	blobWithSszAndTmp := dirFiles{name: "0x1234567890", isDir: true,
 		children: []dirFiles{{name: "5.ssz"}, {name: "0.part"}}}
-	fsLayout.children = append(fsLayout.children, notABlob)
-	fsLayout.children = append(fsLayout.children, childlessBlob)
-	fsLayout.children = append(fsLayout.children, blobWithSsz)
-	fsLayout.children = append(fsLayout.children, blobWithSszAndTmp)
+	fsLayout.children = append(fsLayout.children,
+		notABlob, childlessBlob, blobWithSsz, blobWithSszAndTmp)

 	topChildren := make([]string, len(fsLayout.children))
 	for i := range fsLayout.children {
@@ -282,10 +280,7 @@ func TestListDir(t *testing.T) {
 			dirPath:  ".",
 			expected: []string{notABlob.name},
 			filter: func(s string) bool {
-				if s == notABlob.name {
-					return true
-				}
-				return false
+				return s == notABlob.name
 			},
 		},
 		{