allow other pkgs to check for blobs in pruning cache (#13788)

* allow other pkgs to check for blobs in pruning cache

* address deepsource complaints

* custom error to simplify test setup

* add AllAvailable method

* make storage summary slot field private

* unit test and off-by-one fix

* remove comment with copy of tested function

---------

Co-authored-by: Kasey Kirkham <kasey@users.noreply.github.com>
This commit is contained in:
kasey
2024-04-01 09:19:51 -05:00
committed by GitHub
parent 2b4bb5d890
commit 53fdd2d062
6 changed files with 329 additions and 94 deletions

View File

@@ -4,6 +4,7 @@ go_library(
name = "go_default_library",
srcs = [
"blob.go",
"cache.go",
"ephemeral.go",
"log.go",
"metrics.go",
@@ -33,6 +34,7 @@ go_test(
name = "go_default_test",
srcs = [
"blob_test.go",
"cache_test.go",
"pruner_test.go",
],
embed = [":go_default_library"],

View File

@@ -1,6 +1,7 @@
package filesystem
import (
"context"
"fmt"
"os"
"path"
@@ -103,12 +104,29 @@ func (bs *BlobStorage) WarmCache() {
return
}
go func() {
if err := bs.pruner.prune(0); err != nil {
start := time.Now()
if err := bs.pruner.warmCache(); err != nil {
log.WithError(err).Error("Error encountered while warming up blob pruner cache")
}
log.WithField("elapsed", time.Since(start)).Info("Blob filesystem cache warm-up complete.")
}()
}
// ErrBlobStorageSummarizerUnavailable is a sentinel error returned when there is no pruner/cache available.
// This should be used by code that optionally uses the summarizer to optimize rpc requests. Being able to
// fallback when there is no summarizer allows client code to avoid test complexity where the summarizer doesn't matter.
var ErrBlobStorageSummarizerUnavailable = errors.New("BlobStorage not initialized with a pruner or cache")
// WaitForSummarizer blocks until the BlobStorageSummarizer is ready to use.
// BlobStorageSummarizer is not ready immediately on node startup because it needs to sample the blob filesystem to
// determine which blobs are available.
func (bs *BlobStorage) WaitForSummarizer(ctx context.Context) (BlobStorageSummarizer, error) {
if bs.pruner == nil {
return nil, ErrBlobStorageSummarizerUnavailable
}
return bs.pruner.waitForCache(ctx)
}
// Save saves blobs given a list of sidecars.
func (bs *BlobStorage) Save(sidecar blocks.VerifiedROBlob) error {
startTime := time.Now()

View File

@@ -0,0 +1,119 @@
package filesystem
import (
"sync"
fieldparams "github.com/prysmaticlabs/prysm/v5/config/fieldparams"
"github.com/prysmaticlabs/prysm/v5/config/params"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
)
// blobIndexMask is a bitmask representing the set of blob indices that are currently set.
type blobIndexMask [fieldparams.MaxBlobsPerBlock]bool
// BlobStorageSummary represents cached information about the BlobSidecars on disk for each root the cache knows about.
type BlobStorageSummary struct {
slot primitives.Slot
mask blobIndexMask
}
// HasIndex returns true if the BlobSidecar at the given index is available in the filesystem.
func (s BlobStorageSummary) HasIndex(idx uint64) bool {
// Protect from panic, but assume callers are sophisticated enough to not need an error telling them they have an invalid idx.
if idx >= fieldparams.MaxBlobsPerBlock {
return false
}
return s.mask[idx]
}
// AllAvailable returns true if we have all blobs for all indices from 0 to count-1.
func (s BlobStorageSummary) AllAvailable(count int) bool {
if count > fieldparams.MaxBlobsPerBlock {
return false
}
for i := 0; i < count; i++ {
if !s.mask[i] {
return false
}
}
return true
}
// BlobStorageSummarizer can be used to receive a summary of metadata about blobs on disk for a given root.
// The BlobStorageSummary can be used to check which indices (if any) are available for a given block by root.
type BlobStorageSummarizer interface {
Summary(root [32]byte) BlobStorageSummary
}
type blobStorageCache struct {
mu sync.RWMutex
nBlobs float64
cache map[string]BlobStorageSummary
}
var _ BlobStorageSummarizer = &blobStorageCache{}
func newBlobStorageCache() *blobStorageCache {
return &blobStorageCache{
cache: make(map[string]BlobStorageSummary, params.BeaconConfig().MinEpochsForBlobsSidecarsRequest*fieldparams.SlotsPerEpoch),
}
}
// Summary returns the BlobStorageSummary for `root`. The BlobStorageSummary can be used to check for the presence of
// BlobSidecars based on Index.
func (s *blobStorageCache) Summary(root [32]byte) BlobStorageSummary {
k := rootString(root)
s.mu.RLock()
defer s.mu.RUnlock()
return s.cache[k]
}
func (s *blobStorageCache) ensure(key string, slot primitives.Slot, idx uint64) error {
if idx >= fieldparams.MaxBlobsPerBlock {
return errIndexOutOfBounds
}
s.mu.Lock()
defer s.mu.Unlock()
v := s.cache[key]
v.slot = slot
if !v.mask[idx] {
s.updateMetrics(1)
}
v.mask[idx] = true
s.cache[key] = v
return nil
}
func (s *blobStorageCache) slot(key string) (primitives.Slot, bool) {
s.mu.RLock()
defer s.mu.RUnlock()
v, ok := s.cache[key]
if !ok {
return 0, false
}
return v.slot, ok
}
func (s *blobStorageCache) evict(key string) {
var deleted float64
s.mu.Lock()
v, ok := s.cache[key]
if ok {
for i := range v.mask {
if v.mask[i] {
deleted += 1
}
}
}
delete(s.cache, key)
s.mu.Unlock()
if deleted > 0 {
s.updateMetrics(-deleted)
}
}
func (s *blobStorageCache) updateMetrics(delta float64) {
s.nBlobs += delta
blobDiskCount.Set(s.nBlobs)
blobDiskSize.Set(s.nBlobs * bytesPerSidecar)
}

View File

@@ -0,0 +1,150 @@
package filesystem
import (
"testing"
fieldparams "github.com/prysmaticlabs/prysm/v5/config/fieldparams"
"github.com/prysmaticlabs/prysm/v5/encoding/bytesutil"
"github.com/prysmaticlabs/prysm/v5/testing/require"
)
func TestSlotByRoot_Summary(t *testing.T) {
var noneSet, allSet, firstSet, lastSet, oneSet blobIndexMask
firstSet[0] = true
lastSet[len(lastSet)-1] = true
oneSet[1] = true
for i := range allSet {
allSet[i] = true
}
cases := []struct {
name string
root [32]byte
expected *blobIndexMask
}{
{
name: "not found",
},
{
name: "none set",
expected: &noneSet,
},
{
name: "index 1 set",
expected: &oneSet,
},
{
name: "all set",
expected: &allSet,
},
{
name: "first set",
expected: &firstSet,
},
{
name: "last set",
expected: &lastSet,
},
}
sc := newBlobStorageCache()
for _, c := range cases {
if c.expected != nil {
key := rootString(bytesutil.ToBytes32([]byte(c.name)))
sc.cache[key] = BlobStorageSummary{slot: 0, mask: *c.expected}
}
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
key := bytesutil.ToBytes32([]byte(c.name))
sum := sc.Summary(key)
for i := range c.expected {
ui := uint64(i)
if c.expected == nil {
require.Equal(t, false, sum.HasIndex(ui))
} else {
require.Equal(t, c.expected[i], sum.HasIndex(ui))
}
}
})
}
}
func TestAllAvailable(t *testing.T) {
idxUpTo := func(u int) []int {
r := make([]int, u)
for i := range r {
r[i] = i
}
return r
}
require.DeepEqual(t, []int{}, idxUpTo(0))
require.DeepEqual(t, []int{0}, idxUpTo(1))
require.DeepEqual(t, []int{0, 1, 2, 3, 4, 5}, idxUpTo(6))
cases := []struct {
name string
idxSet []int
count int
aa bool
}{
{
// If there are no blobs committed, then all the committed blobs are available.
name: "none in idx, 0 arg",
count: 0,
aa: true,
},
{
name: "none in idx, 1 arg",
count: 1,
aa: false,
},
{
name: "first in idx, 1 arg",
idxSet: []int{0},
count: 1,
aa: true,
},
{
name: "second in idx, 1 arg",
idxSet: []int{1},
count: 1,
aa: false,
},
{
name: "first missing, 2 arg",
idxSet: []int{1},
count: 2,
aa: false,
},
{
name: "all missing, 1 arg",
count: 6,
aa: false,
},
{
name: "out of bound is safe",
count: fieldparams.MaxBlobsPerBlock + 1,
aa: false,
},
{
name: "max present",
count: fieldparams.MaxBlobsPerBlock,
idxSet: idxUpTo(fieldparams.MaxBlobsPerBlock),
aa: true,
},
{
name: "one present",
count: 1,
idxSet: idxUpTo(1),
aa: true,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
var mask blobIndexMask
for _, idx := range c.idxSet {
mask[idx] = true
}
sum := BlobStorageSummary{mask: mask}
require.Equal(t, c.aa, sum.AllAvailable(c.count))
})
}
}

View File

@@ -1,6 +1,7 @@
package filesystem
import (
"context"
"encoding/binary"
"io"
"path"
@@ -12,7 +13,6 @@ import (
"time"
"github.com/pkg/errors"
fieldparams "github.com/prysmaticlabs/prysm/v5/config/fieldparams"
"github.com/prysmaticlabs/prysm/v5/config/params"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
"github.com/prysmaticlabs/prysm/v5/time/slots"
@@ -32,7 +32,8 @@ type blobPruner struct {
sync.Mutex
prunedBefore atomic.Uint64
windowSize primitives.Slot
slotMap *slotForRoot
cache *blobStorageCache
cacheWarmed chan struct{}
fs afero.Fs
}
@@ -41,13 +42,14 @@ func newBlobPruner(fs afero.Fs, retain primitives.Epoch) (*blobPruner, error) {
if err != nil {
return nil, errors.Wrap(err, "could not set retentionSlots")
}
return &blobPruner{fs: fs, windowSize: r, slotMap: newSlotForRoot()}, nil
cw := make(chan struct{})
return &blobPruner{fs: fs, windowSize: r, cache: newBlobStorageCache(), cacheWarmed: cw}, nil
}
// notify updates the pruner's view of root->blob mappings. This allows the pruner to build a cache
// of root->slot mappings and decide when to evict old blobs based on the age of present blobs.
func (p *blobPruner) notify(root [32]byte, latest primitives.Slot, idx uint64) error {
if err := p.slotMap.ensure(rootString(root), latest, idx); err != nil {
if err := p.cache.ensure(rootString(root), latest, idx); err != nil {
return err
}
pruned := uint64(windowMin(latest, p.windowSize))
@@ -62,7 +64,7 @@ func (p *blobPruner) notify(root [32]byte, latest primitives.Slot, idx uint64) e
return nil
}
func windowMin(latest primitives.Slot, offset primitives.Slot) primitives.Slot {
func windowMin(latest, offset primitives.Slot) primitives.Slot {
// Safely compute the first slot in the epoch for the latest slot
latest = latest - latest%params.BeaconConfig().SlotsPerEpoch
if latest < offset {
@@ -71,6 +73,23 @@ func windowMin(latest primitives.Slot, offset primitives.Slot) primitives.Slot {
return latest - offset
}
func (p *blobPruner) warmCache() error {
if err := p.prune(0); err != nil {
return err
}
close(p.cacheWarmed)
return nil
}
func (p *blobPruner) waitForCache(ctx context.Context) (*blobStorageCache, error) {
select {
case <-p.cacheWarmed:
return p.cache, nil
case <-ctx.Done():
return nil, ctx.Err()
}
}
// Prune prunes blobs in the base directory based on the retention epoch.
// It deletes blobs older than currentEpoch - (retentionEpochs+bufferEpochs).
// This is so that we keep a slight buffer and blobs are deleted after n+2 epochs.
@@ -122,7 +141,7 @@ func shouldRetain(slot, pruneBefore primitives.Slot) bool {
func (p *blobPruner) tryPruneDir(dir string, pruneBefore primitives.Slot) (int, error) {
root := rootFromDir(dir)
slot, slotCached := p.slotMap.slot(root)
slot, slotCached := p.cache.slot(root)
// Return early if the slot is cached and doesn't need pruning.
if slotCached && shouldRetain(slot, pruneBefore) {
return 0, nil
@@ -151,7 +170,7 @@ func (p *blobPruner) tryPruneDir(dir string, pruneBefore primitives.Slot) (int,
if err != nil {
return 0, errors.Wrapf(err, "index could not be determined for blob file %s", scFiles[i])
}
if err := p.slotMap.ensure(root, slot, idx); err != nil {
if err := p.cache.ensure(root, slot, idx); err != nil {
return 0, errors.Wrapf(err, "could not update prune cache for blob file %s", scFiles[i])
}
}
@@ -179,7 +198,7 @@ func (p *blobPruner) tryPruneDir(dir string, pruneBefore primitives.Slot) (int,
return removed, errors.Wrapf(err, "unable to remove blob directory %s", dir)
}
p.slotMap.evict(rootFromDir(dir))
p.cache.evict(rootFromDir(dir))
return len(scFiles), nil
}
@@ -269,71 +288,3 @@ func filterSsz(s string) bool {
func filterPart(s string) bool {
return filepath.Ext(s) == dotPartExt
}
func newSlotForRoot() *slotForRoot {
return &slotForRoot{
cache: make(map[string]*slotCacheEntry, params.BeaconConfig().MinEpochsForBlobsSidecarsRequest*fieldparams.SlotsPerEpoch),
}
}
type slotCacheEntry struct {
slot primitives.Slot
mask [fieldparams.MaxBlobsPerBlock]bool
}
type slotForRoot struct {
sync.RWMutex
nBlobs float64
cache map[string]*slotCacheEntry
}
func (s *slotForRoot) updateMetrics(delta float64) {
s.nBlobs += delta
blobDiskCount.Set(s.nBlobs)
blobDiskSize.Set(s.nBlobs * bytesPerSidecar)
}
func (s *slotForRoot) ensure(key string, slot primitives.Slot, idx uint64) error {
if idx >= fieldparams.MaxBlobsPerBlock {
return errIndexOutOfBounds
}
s.Lock()
defer s.Unlock()
v, ok := s.cache[key]
if !ok {
v = &slotCacheEntry{}
}
v.slot = slot
if !v.mask[idx] {
s.updateMetrics(1)
}
v.mask[idx] = true
s.cache[key] = v
return nil
}
func (s *slotForRoot) slot(key string) (primitives.Slot, bool) {
s.RLock()
defer s.RUnlock()
v, ok := s.cache[key]
if !ok {
return 0, false
}
return v.slot, ok
}
func (s *slotForRoot) evict(key string) {
s.Lock()
defer s.Unlock()
v, ok := s.cache[key]
var deleted float64
if ok {
for i := range v.mask {
if v.mask[i] {
deleted += 1
}
}
s.updateMetrics(-deleted)
}
delete(s.cache, key)
}

View File

@@ -28,7 +28,7 @@ func TestTryPruneDir_CachedNotExpired(t *testing.T) {
root := fmt.Sprintf("%#x", sc.BlockRoot())
// This slot is right on the edge of what would need to be pruned, so by adding it to the cache and
// skipping any other test setup, we can be certain the hot cache path never touches the filesystem.
require.NoError(t, pr.slotMap.ensure(root, sc.Slot(), 0))
require.NoError(t, pr.cache.ensure(root, sc.Slot(), 0))
pruned, err := pr.tryPruneDir(root, pr.windowSize)
require.NoError(t, err)
require.Equal(t, 0, pruned)
@@ -45,7 +45,7 @@ func TestTryPruneDir_CachedExpired(t *testing.T) {
require.NoError(t, err)
root := fmt.Sprintf("%#x", sc.BlockRoot())
require.NoError(t, fs.Mkdir(root, directoryPermissions)) // make empty directory
require.NoError(t, pr.slotMap.ensure(root, sc.Slot(), 0))
require.NoError(t, pr.cache.ensure(root, sc.Slot(), 0))
pruned, err := pr.tryPruneDir(root, slot+1)
require.NoError(t, err)
require.Equal(t, 0, pruned)
@@ -63,7 +63,7 @@ func TestTryPruneDir_CachedExpired(t *testing.T) {
// check that the root->slot is cached
root := fmt.Sprintf("%#x", scs[0].BlockRoot())
cs, cok := bs.pruner.slotMap.slot(root)
cs, cok := bs.pruner.cache.slot(root)
require.Equal(t, true, cok)
require.Equal(t, slot, cs)
@@ -95,12 +95,12 @@ func TestTryPruneDir_SlotFromFile(t *testing.T) {
// check that the root->slot is cached
root := fmt.Sprintf("%#x", scs[0].BlockRoot())
cs, ok := bs.pruner.slotMap.slot(root)
cs, ok := bs.pruner.cache.slot(root)
require.Equal(t, true, ok)
require.Equal(t, slot, cs)
// evict it from the cache so that we trigger the file read path
bs.pruner.slotMap.evict(root)
_, ok = bs.pruner.slotMap.slot(root)
bs.pruner.cache.evict(root)
_, ok = bs.pruner.cache.slot(root)
require.Equal(t, false, ok)
// ensure that we see the saved files in the filesystem
@@ -119,7 +119,7 @@ func TestTryPruneDir_SlotFromFile(t *testing.T) {
fs, bs, err := NewEphemeralBlobStorageWithFs(t)
require.NoError(t, err)
// Set slot equal to the window size, so it should be retained.
var slot primitives.Slot = bs.pruner.windowSize
slot := bs.pruner.windowSize
_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, slot, 2)
scs, err := verification.BlobSidecarSliceNoop(sidecars)
require.NoError(t, err)
@@ -129,8 +129,8 @@ func TestTryPruneDir_SlotFromFile(t *testing.T) {
// Evict slot mapping from the cache so that we trigger the file read path.
root := fmt.Sprintf("%#x", scs[0].BlockRoot())
bs.pruner.slotMap.evict(root)
_, ok := bs.pruner.slotMap.slot(root)
bs.pruner.cache.evict(root)
_, ok := bs.pruner.cache.slot(root)
require.Equal(t, false, ok)
// Ensure that we see the saved files in the filesystem.
@@ -243,10 +243,8 @@ func TestListDir(t *testing.T) {
}
blobWithSszAndTmp := dirFiles{name: "0x1234567890", isDir: true,
children: []dirFiles{{name: "5.ssz"}, {name: "0.part"}}}
fsLayout.children = append(fsLayout.children, notABlob)
fsLayout.children = append(fsLayout.children, childlessBlob)
fsLayout.children = append(fsLayout.children, blobWithSsz)
fsLayout.children = append(fsLayout.children, blobWithSszAndTmp)
fsLayout.children = append(fsLayout.children,
notABlob, childlessBlob, blobWithSsz, blobWithSszAndTmp)
topChildren := make([]string, len(fsLayout.children))
for i := range fsLayout.children {
@@ -282,10 +280,7 @@ func TestListDir(t *testing.T) {
dirPath: ".",
expected: []string{notABlob.name},
filter: func(s string) bool {
if s == notABlob.name {
return true
}
return false
return s == notABlob.name
},
},
{