Organize blobs on disk by epoch (#14023)

* organize blob directories by period and epoch

* changelog

* remove Indices and replace with Summary

* old PR feedback

* log to advise about the speed of blob migration

* rename level->layer (hoping term is more clear)

* assert path in tests for increased legibility

* lint

* lint

* remove test covering a newly impossible error

* improve feedback from flag validation failure

* Try to clean dangling dirs epoch->flat migration

* lint

* Preston feedback

* try all layouts and short-circuit if base not found

---------

Co-authored-by: Kasey Kirkham <kasey@users.noreply.github.com>
This commit is contained in:
kasey
2025-02-05 14:09:38 -06:00
committed by GitHub
parent 2a7fc84044
commit 41daac1b04
42 changed files with 2137 additions and 1137 deletions

View File

@@ -512,17 +512,11 @@ func missingIndices(bs *filesystem.BlobStorage, root [32]byte, expected [][]byte
if len(expected) > maxBlobsPerBlock { if len(expected) > maxBlobsPerBlock {
return nil, errMaxBlobsExceeded return nil, errMaxBlobsExceeded
} }
indices, err := bs.Indices(root, slot) indices := bs.Summary(root)
if err != nil {
return nil, err
}
missing := make(map[uint64]struct{}, len(expected)) missing := make(map[uint64]struct{}, len(expected))
for i := range expected { for i := range expected {
ui := uint64(i) if len(expected[i]) > 0 && !indices.HasIndex(uint64(i)) {
if len(expected[i]) > 0 { missing[uint64(i)] = struct{}{}
if !indices[i] {
missing[ui] = struct{}{}
}
} }
} }
return missing, nil return missing, nil

View File

@@ -2297,7 +2297,7 @@ func TestMissingIndices(t *testing.T) {
for _, c := range cases { for _, c := range cases {
bm, bs := filesystem.NewEphemeralBlobStorageWithMocker(t) bm, bs := filesystem.NewEphemeralBlobStorageWithMocker(t)
t.Run(c.name, func(t *testing.T) { t.Run(c.name, func(t *testing.T) {
require.NoError(t, bm.CreateFakeIndices(c.root, c.present...)) require.NoError(t, bm.CreateFakeIndices(c.root, 0, c.present...))
missing, err := missingIndices(bs, c.root, c.expected, 0) missing, err := missingIndices(bs, c.root, c.expected, 0)
if c.err != nil { if c.err != nil {
require.ErrorIs(t, err, c.err) require.ErrorIs(t, err, c.err)

View File

@@ -94,14 +94,7 @@ func (s *LazilyPersistentStore) IsDataAvailable(ctx context.Context, current pri
entry := s.cache.ensure(key) entry := s.cache.ensure(key)
defer s.cache.delete(key) defer s.cache.delete(key)
root := b.Root() root := b.Root()
sumz, err := s.store.WaitForSummarizer(ctx) entry.setDiskSummary(s.store.Summary(root))
if err != nil {
log.WithField("root", fmt.Sprintf("%#x", b.Root())).
WithError(err).
Debug("Failed to receive BlobStorageSummarizer within IsDataAvailable")
} else {
entry.setDiskSummary(sumz.Summary(root))
}
// Verify we have all the expected sidecars, and fail fast if any are missing or inconsistent. // Verify we have all the expected sidecars, and fail fast if any are missing or inconsistent.
// We don't try to salvage problematic batches because this indicates a misbehaving peer and we'd rather // We don't try to salvage problematic batches because this indicates a misbehaving peer and we'd rather

View File

@@ -5,6 +5,10 @@ go_library(
srcs = [ srcs = [
"blob.go", "blob.go",
"cache.go", "cache.go",
"iteration.go",
"layout.go",
"layout_by_epoch.go",
"layout_flat.go",
"log.go", "log.go",
"metrics.go", "metrics.go",
"mock.go", "mock.go",
@@ -13,6 +17,7 @@ go_library(
importpath = "github.com/prysmaticlabs/prysm/v5/beacon-chain/db/filesystem", importpath = "github.com/prysmaticlabs/prysm/v5/beacon-chain/db/filesystem",
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
deps = [ deps = [
"//beacon-chain/db:go_default_library",
"//beacon-chain/verification:go_default_library", "//beacon-chain/verification:go_default_library",
"//config/fieldparams:go_default_library", "//config/fieldparams:go_default_library",
"//config/params:go_default_library", "//config/params:go_default_library",
@@ -20,7 +25,6 @@ go_library(
"//consensus-types/primitives:go_default_library", "//consensus-types/primitives:go_default_library",
"//encoding/bytesutil:go_default_library", "//encoding/bytesutil:go_default_library",
"//io/file:go_default_library", "//io/file:go_default_library",
"//proto/prysm/v1alpha1:go_default_library",
"//runtime/logging:go_default_library", "//runtime/logging:go_default_library",
"//time/slots:go_default_library", "//time/slots:go_default_library",
"@com_github_ethereum_go_ethereum//common/hexutil:go_default_library", "@com_github_ethereum_go_ethereum//common/hexutil:go_default_library",
@@ -37,10 +41,14 @@ go_test(
srcs = [ srcs = [
"blob_test.go", "blob_test.go",
"cache_test.go", "cache_test.go",
"iteration_test.go",
"layout_test.go",
"migration_test.go",
"pruner_test.go", "pruner_test.go",
], ],
embed = [":go_default_library"], embed = [":go_default_library"],
deps = [ deps = [
"//beacon-chain/db:go_default_library",
"//beacon-chain/verification:go_default_library", "//beacon-chain/verification:go_default_library",
"//config/params:go_default_library", "//config/params:go_default_library",
"//consensus-types/primitives:go_default_library", "//consensus-types/primitives:go_default_library",
@@ -48,6 +56,7 @@ go_test(
"//proto/prysm/v1alpha1:go_default_library", "//proto/prysm/v1alpha1:go_default_library",
"//testing/require:go_default_library", "//testing/require:go_default_library",
"//testing/util:go_default_library", "//testing/util:go_default_library",
"//time/slots:go_default_library",
"@com_github_prysmaticlabs_fastssz//:go_default_library", "@com_github_prysmaticlabs_fastssz//:go_default_library",
"@com_github_spf13_afero//:go_default_library", "@com_github_spf13_afero//:go_default_library",
], ],

View File

@@ -1,42 +1,31 @@
package filesystem package filesystem
import ( import (
"context"
"fmt" "fmt"
"math" "math"
"os" "os"
"path" "path"
"strconv"
"strings"
"time" "time"
"github.com/ethereum/go-ethereum/common/hexutil"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/verification" "github.com/prysmaticlabs/prysm/v5/beacon-chain/verification"
"github.com/prysmaticlabs/prysm/v5/config/params" "github.com/prysmaticlabs/prysm/v5/config/params"
"github.com/prysmaticlabs/prysm/v5/consensus-types/blocks" "github.com/prysmaticlabs/prysm/v5/consensus-types/blocks"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives" "github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
"github.com/prysmaticlabs/prysm/v5/encoding/bytesutil"
"github.com/prysmaticlabs/prysm/v5/io/file" "github.com/prysmaticlabs/prysm/v5/io/file"
ethpb "github.com/prysmaticlabs/prysm/v5/proto/prysm/v1alpha1"
"github.com/prysmaticlabs/prysm/v5/runtime/logging" "github.com/prysmaticlabs/prysm/v5/runtime/logging"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/spf13/afero" "github.com/spf13/afero"
) )
func directoryPermissions() os.FileMode {
return params.BeaconIoConfig().ReadWriteExecutePermissions
}
var ( var (
errIndexOutOfBounds = errors.New("blob index in file name >= DeprecatedMaxBlobsPerBlock") errIndexOutOfBounds = errors.New("blob index in file name >= MAX_BLOBS_PER_BLOCK")
errEmptyBlobWritten = errors.New("zero bytes written to disk when saving blob sidecar")
errSidecarEmptySSZData = errors.New("sidecar marshalled to an empty ssz byte slice") errSidecarEmptySSZData = errors.New("sidecar marshalled to an empty ssz byte slice")
errNoBasePath = errors.New("BlobStorage base path not specified in init") errNoBasePath = errors.New("BlobStorage base path not specified in init")
errInvalidRootString = errors.New("Could not parse hex string as a [32]byte")
)
const (
sszExt = "ssz"
partExt = "part"
directoryPermissions = 0700
) )
// BlobStorageOption is a functional option for configuring a BlobStorage. // BlobStorageOption is a functional option for configuring a BlobStorage.
@@ -66,6 +55,23 @@ func WithSaveFsync(fsync bool) BlobStorageOption {
} }
} }
// WithFs allows the afero.Fs implementation to be customized. Used by tests
// to substitute an in-memory filesystem.
func WithFs(fs afero.Fs) BlobStorageOption {
return func(b *BlobStorage) error {
b.fs = fs
return nil
}
}
// WithLayout enables the user to specify which layout scheme to use, dictating how blob files are stored on disk.
func WithLayout(name string) BlobStorageOption {
return func(b *BlobStorage) error {
b.layoutName = name
return nil
}
}
// NewBlobStorage creates a new instance of the BlobStorage object. Note that the implementation of BlobStorage may // NewBlobStorage creates a new instance of the BlobStorage object. Note that the implementation of BlobStorage may
// attempt to hold a file lock to guarantee exclusive control of the blob storage directory, so this should only be // attempt to hold a file lock to guarantee exclusive control of the blob storage directory, so this should only be
// initialized once per beacon node. // initialized once per beacon node.
@@ -76,19 +82,27 @@ func NewBlobStorage(opts ...BlobStorageOption) (*BlobStorage, error) {
return nil, errors.Wrap(err, "failed to create blob storage") return nil, errors.Wrap(err, "failed to create blob storage")
} }
} }
if b.base == "" { // Allow tests to set up a different fs using WithFs.
return nil, errNoBasePath if b.fs == nil {
if b.base == "" {
return nil, errNoBasePath
}
b.base = path.Clean(b.base)
if err := file.MkdirAll(b.base); err != nil {
return nil, errors.Wrapf(err, "failed to create blob storage at %s", b.base)
}
b.fs = afero.NewBasePathFs(afero.NewOsFs(), b.base)
} }
b.base = path.Clean(b.base) b.cache = newBlobStorageCache()
if err := file.MkdirAll(b.base); err != nil { pruner := newBlobPruner(b.retentionEpochs)
return nil, errors.Wrapf(err, "failed to create blob storage at %s", b.base) if b.layoutName == "" {
b.layoutName = LayoutNameFlat
} }
b.fs = afero.NewBasePathFs(afero.NewOsFs(), b.base) layout, err := newLayout(b.layoutName, b.fs, b.cache, pruner)
pruner, err := newBlobPruner(b.fs, b.retentionEpochs)
if err != nil { if err != nil {
return nil, err return nil, err
} }
b.pruner = pruner b.layout = layout
return b, nil return b, nil
} }
@@ -96,47 +110,103 @@ func NewBlobStorage(opts ...BlobStorageOption) (*BlobStorage, error) {
type BlobStorage struct { type BlobStorage struct {
base string base string
retentionEpochs primitives.Epoch retentionEpochs primitives.Epoch
layoutName string
fsync bool fsync bool
fs afero.Fs fs afero.Fs
pruner *blobPruner layout fsLayout
cache *blobStorageSummaryCache
} }
// WarmCache runs the prune routine with an expiration of slot of 0, so nothing will be pruned, but the pruner's cache // WarmCache runs the prune routine with an expiration of slot of 0, so nothing will be pruned, but the pruner's cache
// will be populated at node startup, avoiding a costly cold prune (~4s in syscalls) during syncing. // will be populated at node startup, avoiding a costly cold prune (~4s in syscalls) during syncing.
func (bs *BlobStorage) WarmCache() { func (bs *BlobStorage) WarmCache() {
if bs.pruner == nil { start := time.Now()
return if bs.layoutName == LayoutNameFlat {
}
go func() {
start := time.Now()
log.Info("Blob filesystem cache warm-up started. This may take a few minutes.") log.Info("Blob filesystem cache warm-up started. This may take a few minutes.")
if err := bs.pruner.warmCache(); err != nil { } else {
log.WithError(err).Error("Error encountered while warming up blob pruner cache") log.Info("Blob filesystem cache warm-up started.")
} }
log.WithField("elapsed", time.Since(start)).Info("Blob filesystem cache warm-up complete")
}() if err := warmCache(bs.layout, bs.cache); err != nil {
log.WithError(err).Error("Error encountered while warming up blob filesystem cache.")
}
if err := bs.migrateLayouts(); err != nil {
log.WithError(err).Error("Error encountered while migrating blob storage.")
}
log.WithField("elapsed", time.Since(start)).Info("Blob filesystem cache warm-up complete.")
} }
// ErrBlobStorageSummarizerUnavailable is a sentinel error returned when there is no pruner/cache available. // If any blob storage directories are found for layouts besides the configured layout, migrate them.
// This should be used by code that optionally uses the summarizer to optimize rpc requests. Being able to func (bs *BlobStorage) migrateLayouts() error {
// fallback when there is no summarizer allows client code to avoid test complexity where the summarizer doesn't matter. for _, name := range LayoutNames {
var ErrBlobStorageSummarizerUnavailable = errors.New("BlobStorage not initialized with a pruner or cache") if name == bs.layoutName {
continue
// WaitForSummarizer blocks until the BlobStorageSummarizer is ready to use. }
// BlobStorageSummarizer is not ready immediately on node startup because it needs to sample the blob filesystem to from, err := newLayout(name, bs.fs, bs.cache, nil)
// determine which blobs are available. if err != nil {
func (bs *BlobStorage) WaitForSummarizer(ctx context.Context) (BlobStorageSummarizer, error) { return err
if bs == nil || bs.pruner == nil { }
return nil, ErrBlobStorageSummarizerUnavailable if err := migrateLayout(bs.fs, from, bs.layout, bs.cache); err != nil {
if errors.Is(err, errLayoutNotDetected) {
continue
}
return errors.Wrapf(err, "failed to migrate layout from %s to %s", name, bs.layoutName)
}
} }
return bs.pruner.waitForCache(ctx) return nil
}
func (bs *BlobStorage) writePart(sidecar blocks.VerifiedROBlob) (ppath string, err error) {
ident := identForSidecar(sidecar)
sidecarData, err := sidecar.MarshalSSZ()
if err != nil {
return "", errors.Wrap(err, "failed to serialize sidecar data")
}
if len(sidecarData) == 0 {
return "", errSidecarEmptySSZData
}
if err := bs.fs.MkdirAll(bs.layout.dir(ident), directoryPermissions()); err != nil {
return "", err
}
ppath = bs.layout.partPath(ident, fmt.Sprintf("%p", sidecarData))
// Create a partial file and write the serialized data to it.
partialFile, err := bs.fs.Create(ppath)
if err != nil {
return "", errors.Wrap(err, "failed to create partial file")
}
defer func() {
cerr := partialFile.Close()
// The close error is probably less important than any existing error, so only overwrite nil err.
if cerr != nil && err == nil {
err = cerr
}
}()
n, err := partialFile.Write(sidecarData)
if err != nil {
return ppath, errors.Wrap(err, "failed to write to partial file")
}
if bs.fsync {
if err := partialFile.Sync(); err != nil {
return ppath, err
}
}
if n != len(sidecarData) {
return ppath, fmt.Errorf("failed to write the full bytes of sidecarData, wrote only %d of %d bytes", n, len(sidecarData))
}
return ppath, nil
} }
// Save saves blobs given a list of sidecars. // Save saves blobs given a list of sidecars.
func (bs *BlobStorage) Save(sidecar blocks.VerifiedROBlob) error { func (bs *BlobStorage) Save(sidecar blocks.VerifiedROBlob) error {
startTime := time.Now() startTime := time.Now()
fname := namerForSidecar(sidecar)
sszPath := fname.path() ident := identForSidecar(sidecar)
sszPath := bs.layout.sszPath(ident)
exists, err := afero.Exists(bs.fs, sszPath) exists, err := afero.Exists(bs.fs, sszPath)
if err != nil { if err != nil {
return err return err
@@ -145,78 +215,36 @@ func (bs *BlobStorage) Save(sidecar blocks.VerifiedROBlob) error {
log.WithFields(logging.BlobFields(sidecar.ROBlob)).Debug("Ignoring a duplicate blob sidecar save attempt") log.WithFields(logging.BlobFields(sidecar.ROBlob)).Debug("Ignoring a duplicate blob sidecar save attempt")
return nil return nil
} }
if bs.pruner != nil {
if err := bs.pruner.notify(sidecar.BlockRoot(), sidecar.Slot(), sidecar.Index); err != nil {
return errors.Wrapf(err, "problem maintaining pruning cache/metrics for sidecar with root=%#x", sidecar.BlockRoot())
}
}
// Serialize the ethpb.BlobSidecar to binary data using SSZ.
sidecarData, err := sidecar.MarshalSSZ()
if err != nil {
return errors.Wrap(err, "failed to serialize sidecar data")
} else if len(sidecarData) == 0 {
return errSidecarEmptySSZData
}
if err := bs.fs.MkdirAll(fname.dir(), directoryPermissions); err != nil {
return err
}
partPath := fname.partPath(fmt.Sprintf("%p", sidecarData))
partialMoved := false partialMoved := false
partPath, err := bs.writePart(sidecar)
// Ensure the partial file is deleted. // Ensure the partial file is deleted.
defer func() { defer func() {
if partialMoved { if partialMoved || partPath == "" {
return return
} }
// It's expected to error if the save is successful. // It's expected to error if the save is successful.
err = bs.fs.Remove(partPath) err := bs.fs.Remove(partPath)
if err == nil { if err == nil {
log.WithFields(logrus.Fields{ log.WithFields(logrus.Fields{
"partPath": partPath, "partPath": partPath,
}).Debugf("Removed partial file") }).Debugf("Removed partial file")
} }
}() }()
// Create a partial file and write the serialized data to it.
partialFile, err := bs.fs.Create(partPath)
if err != nil { if err != nil {
return errors.Wrap(err, "failed to create partial file")
}
n, err := partialFile.Write(sidecarData)
if err != nil {
closeErr := partialFile.Close()
if closeErr != nil {
return closeErr
}
return errors.Wrap(err, "failed to write to partial file")
}
if bs.fsync {
if err := partialFile.Sync(); err != nil {
return err
}
}
if err := partialFile.Close(); err != nil {
return err return err
} }
if n != len(sidecarData) {
return fmt.Errorf("failed to write the full bytes of sidecarData, wrote only %d of %d bytes", n, len(sidecarData))
}
if n == 0 {
return errEmptyBlobWritten
}
// Atomically rename the partial file to its final name. // Atomically rename the partial file to its final name.
err = bs.fs.Rename(partPath, sszPath) err = bs.fs.Rename(partPath, sszPath)
if err != nil { if err != nil {
return errors.Wrap(err, "failed to rename partial file to final name") return errors.Wrap(err, "failed to rename partial file to final name")
} }
partialMoved = true partialMoved = true
if err := bs.layout.notify(ident); err != nil {
return errors.Wrapf(err, "problem maintaining pruning cache/metrics for sidecar with root=%#x", sidecar.BlockRoot())
}
blobsWrittenCounter.Inc() blobsWrittenCounter.Inc()
blobSaveLatency.Observe(float64(time.Since(startTime).Milliseconds())) blobSaveLatency.Observe(float64(time.Since(startTime).Milliseconds()))
@@ -228,70 +256,30 @@ func (bs *BlobStorage) Save(sidecar blocks.VerifiedROBlob) error {
// value is always a VerifiedROBlob. // value is always a VerifiedROBlob.
func (bs *BlobStorage) Get(root [32]byte, idx uint64) (blocks.VerifiedROBlob, error) { func (bs *BlobStorage) Get(root [32]byte, idx uint64) (blocks.VerifiedROBlob, error) {
startTime := time.Now() startTime := time.Now()
expected := blobNamer{root: root, index: idx} ident, err := bs.layout.ident(root, idx)
encoded, err := afero.ReadFile(bs.fs, expected.path())
var v blocks.VerifiedROBlob
if err != nil { if err != nil {
return v, err return verification.VerifiedROBlobError(err)
}
s := &ethpb.BlobSidecar{}
if err := s.UnmarshalSSZ(encoded); err != nil {
return v, err
}
ro, err := blocks.NewROBlobWithRoot(s, root)
if err != nil {
return blocks.VerifiedROBlob{}, err
} }
defer func() { defer func() {
blobFetchLatency.Observe(float64(time.Since(startTime).Milliseconds())) blobFetchLatency.Observe(float64(time.Since(startTime).Milliseconds()))
}() }()
return verification.BlobSidecarNoop(ro) return verification.VerifiedROBlobFromDisk(bs.fs, root, bs.layout.sszPath(ident))
} }
// Remove removes all blobs for a given root. // Remove removes all blobs for a given root.
func (bs *BlobStorage) Remove(root [32]byte) error { func (bs *BlobStorage) Remove(root [32]byte) error {
rootDir := blobNamer{root: root}.dir() dirIdent, err := bs.layout.dirIdent(root)
return bs.fs.RemoveAll(rootDir) if err != nil {
return err
}
_, err = bs.layout.remove(dirIdent)
return err
} }
// Indices generates a bitmap representing which BlobSidecar.Index values are present on disk for a given root. // Summary returns the BlobStorageSummary from the layout.
// This value can be compared to the commitments observed in a block to determine which indices need to be found // Internally, this is a cached representation of the directory listing for the given root.
// on the network to confirm data availability. func (bs *BlobStorage) Summary(root [32]byte) BlobStorageSummary {
func (bs *BlobStorage) Indices(root [32]byte, s primitives.Slot) ([]bool, error) { return bs.layout.summary(root)
maxBlobsPerBlock := params.BeaconConfig().MaxBlobsPerBlock(s)
mask := make([]bool, maxBlobsPerBlock)
rootDir := blobNamer{root: root}.dir()
entries, err := afero.ReadDir(bs.fs, rootDir)
if err != nil {
if os.IsNotExist(err) {
return mask, nil
}
return mask, err
}
for i := range entries {
if entries[i].IsDir() {
continue
}
name := entries[i].Name()
if !strings.HasSuffix(name, sszExt) {
continue
}
parts := strings.Split(name, ".")
if len(parts) != 2 {
continue
}
u, err := strconv.ParseUint(parts[0], 10, 64)
if err != nil {
return mask, errors.Wrapf(err, "unexpected directory entry breaks listing, %s", parts[0])
}
if u >= uint64(maxBlobsPerBlock) {
return mask, errIndexOutOfBounds
}
mask[u] = true
}
return mask, nil
} }
// Clear deletes all files on the filesystem. // Clear deletes all files on the filesystem.
@@ -316,36 +304,3 @@ func (bs *BlobStorage) WithinRetentionPeriod(requested, current primitives.Epoch
} }
return requested+bs.retentionEpochs >= current return requested+bs.retentionEpochs >= current
} }
type blobNamer struct {
root [32]byte
index uint64
}
func namerForSidecar(sc blocks.VerifiedROBlob) blobNamer {
return blobNamer{root: sc.BlockRoot(), index: sc.Index}
}
func (p blobNamer) dir() string {
return rootString(p.root)
}
func (p blobNamer) partPath(entropy string) string {
return path.Join(p.dir(), fmt.Sprintf("%s-%d.%s", entropy, p.index, partExt))
}
func (p blobNamer) path() string {
return path.Join(p.dir(), fmt.Sprintf("%d.%s", p.index, sszExt))
}
func rootString(root [32]byte) string {
return fmt.Sprintf("%#x", root)
}
func stringToRoot(str string) ([32]byte, error) {
slice, err := hexutil.Decode(str)
if err != nil {
return [32]byte{}, errors.Wrapf(errInvalidRootString, "input=%s", str)
}
return bytesutil.ToBytes32(slice), nil
}

View File

@@ -9,26 +9,26 @@ import (
"testing" "testing"
ssz "github.com/prysmaticlabs/fastssz" ssz "github.com/prysmaticlabs/fastssz"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/db"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/verification" "github.com/prysmaticlabs/prysm/v5/beacon-chain/verification"
"github.com/prysmaticlabs/prysm/v5/config/params" "github.com/prysmaticlabs/prysm/v5/config/params"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives" "github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
"github.com/prysmaticlabs/prysm/v5/encoding/bytesutil"
ethpb "github.com/prysmaticlabs/prysm/v5/proto/prysm/v1alpha1" ethpb "github.com/prysmaticlabs/prysm/v5/proto/prysm/v1alpha1"
"github.com/prysmaticlabs/prysm/v5/testing/require" "github.com/prysmaticlabs/prysm/v5/testing/require"
"github.com/prysmaticlabs/prysm/v5/testing/util" "github.com/prysmaticlabs/prysm/v5/testing/util"
"github.com/prysmaticlabs/prysm/v5/time/slots"
"github.com/spf13/afero" "github.com/spf13/afero"
) )
func TestBlobStorage_SaveBlobData(t *testing.T) { func TestBlobStorage_SaveBlobData(t *testing.T) {
_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, 1, params.BeaconConfig().MaxBlobsPerBlock(1)) _, sidecars := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, 1, params.BeaconConfig().MaxBlobsPerBlock(1))
testSidecars, err := verification.BlobSidecarSliceNoop(sidecars) testSidecars := verification.FakeVerifySliceForTest(t, sidecars)
require.NoError(t, err)
t.Run("no error for duplicate", func(t *testing.T) { t.Run("no error for duplicate", func(t *testing.T) {
fs, bs := NewEphemeralBlobStorageWithFs(t) fs, bs := NewEphemeralBlobStorageAndFs(t)
existingSidecar := testSidecars[0] existingSidecar := testSidecars[0]
blobPath := namerForSidecar(existingSidecar).path() blobPath := bs.layout.sszPath(identForSidecar(existingSidecar))
// Serialize the existing BlobSidecar to binary data. // Serialize the existing BlobSidecar to binary data.
existingSidecarData, err := ssz.MarshalSSZ(existingSidecar) existingSidecarData, err := ssz.MarshalSSZ(existingSidecar)
require.NoError(t, err) require.NoError(t, err)
@@ -56,8 +56,8 @@ func TestBlobStorage_SaveBlobData(t *testing.T) {
require.NoError(t, bs.Save(sc)) require.NoError(t, bs.Save(sc))
actualSc, err := bs.Get(sc.BlockRoot(), sc.Index) actualSc, err := bs.Get(sc.BlockRoot(), sc.Index)
require.NoError(t, err) require.NoError(t, err)
expectedIdx := []bool{false, false, true, false, false, false} expectedIdx := blobIndexMask{false, false, true, false, false, false}
actualIdx, err := bs.Indices(actualSc.BlockRoot(), 100) actualIdx := bs.Summary(actualSc.BlockRoot()).mask
require.NoError(t, err) require.NoError(t, err)
require.DeepEqual(t, expectedIdx, actualIdx) require.DeepEqual(t, expectedIdx, actualIdx)
}) })
@@ -85,7 +85,7 @@ func TestBlobStorage_SaveBlobData(t *testing.T) {
require.NoError(t, bs.Remove(expected.BlockRoot())) require.NoError(t, bs.Remove(expected.BlockRoot()))
_, err = bs.Get(expected.BlockRoot(), expected.Index) _, err = bs.Get(expected.BlockRoot(), expected.Index)
require.ErrorContains(t, "file does not exist", err) require.Equal(t, true, db.IsNotFound(err))
}) })
t.Run("clear", func(t *testing.T) { t.Run("clear", func(t *testing.T) {
@@ -126,16 +126,14 @@ func TestBlobStorage_SaveBlobData(t *testing.T) {
}) })
} }
// pollUntil polls a condition function until it returns true or a timeout is reached.
func TestBlobIndicesBounds(t *testing.T) { func TestBlobIndicesBounds(t *testing.T) {
fs, bs := NewEphemeralBlobStorageWithFs(t) fs := afero.NewMemMapFs()
root := [32]byte{} root := [32]byte{}
okIdx := uint64(params.BeaconConfig().MaxBlobsPerBlock(0)) - 1 okIdx := uint64(params.BeaconConfig().MaxBlobsPerBlock(0)) - 1
writeFakeSSZ(t, fs, root, okIdx) writeFakeSSZ(t, fs, root, 0, okIdx)
indices, err := bs.Indices(root, 100) bs := NewWarmedEphemeralBlobStorageUsingFs(t, fs, WithLayout(LayoutNameByEpoch))
require.NoError(t, err) indices := bs.Summary(root).mask
expected := make([]bool, params.BeaconConfig().MaxBlobsPerBlock(0)) expected := make([]bool, params.BeaconConfig().MaxBlobsPerBlock(0))
expected[okIdx] = true expected[okIdx] = true
for i := range expected { for i := range expected {
@@ -143,102 +141,23 @@ func TestBlobIndicesBounds(t *testing.T) {
} }
oobIdx := uint64(params.BeaconConfig().MaxBlobsPerBlock(0)) oobIdx := uint64(params.BeaconConfig().MaxBlobsPerBlock(0))
writeFakeSSZ(t, fs, root, oobIdx) writeFakeSSZ(t, fs, root, 0, oobIdx)
_, err = bs.Indices(root, 100) // This now fails at cache warmup time.
require.ErrorIs(t, err, errIndexOutOfBounds) require.ErrorIs(t, warmCache(bs.layout, bs.cache), errIndexOutOfBounds)
} }
func writeFakeSSZ(t *testing.T, fs afero.Fs, root [32]byte, idx uint64) { func writeFakeSSZ(t *testing.T, fs afero.Fs, root [32]byte, slot primitives.Slot, idx uint64) {
namer := blobNamer{root: root, index: idx} epoch := slots.ToEpoch(slot)
require.NoError(t, fs.MkdirAll(namer.dir(), 0700)) namer := newBlobIdent(root, epoch, idx)
fh, err := fs.Create(namer.path()) layout := periodicEpochLayout{}
require.NoError(t, fs.MkdirAll(layout.dir(namer), 0700))
fh, err := fs.Create(layout.sszPath(namer))
require.NoError(t, err) require.NoError(t, err)
_, err = fh.Write([]byte("derp")) _, err = fh.Write([]byte("derp"))
require.NoError(t, err) require.NoError(t, err)
require.NoError(t, fh.Close()) require.NoError(t, fh.Close())
} }
func TestBlobStoragePrune(t *testing.T) {
currentSlot := primitives.Slot(200000)
fs, bs := NewEphemeralBlobStorageWithFs(t)
t.Run("PruneOne", func(t *testing.T) {
_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, 300, params.BeaconConfig().MaxBlobsPerBlock(0))
testSidecars, err := verification.BlobSidecarSliceNoop(sidecars)
require.NoError(t, err)
for _, sidecar := range testSidecars {
require.NoError(t, bs.Save(sidecar))
}
require.NoError(t, bs.pruner.prune(currentSlot-bs.pruner.windowSize))
remainingFolders, err := afero.ReadDir(fs, ".")
require.NoError(t, err)
require.Equal(t, 0, len(remainingFolders))
})
t.Run("Prune dangling blob", func(t *testing.T) {
_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, 299, params.BeaconConfig().MaxBlobsPerBlock(0))
testSidecars, err := verification.BlobSidecarSliceNoop(sidecars)
require.NoError(t, err)
for _, sidecar := range testSidecars[4:] {
require.NoError(t, bs.Save(sidecar))
}
require.NoError(t, bs.pruner.prune(currentSlot-bs.pruner.windowSize))
remainingFolders, err := afero.ReadDir(fs, ".")
require.NoError(t, err)
require.Equal(t, 0, len(remainingFolders))
})
t.Run("PruneMany", func(t *testing.T) {
blockQty := 10
slot := primitives.Slot(1)
for j := 0; j <= blockQty; j++ {
root := bytesutil.ToBytes32(bytesutil.ToBytes(uint64(slot), 32))
_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, root, slot, params.BeaconConfig().MaxBlobsPerBlock(0))
testSidecars, err := verification.BlobSidecarSliceNoop(sidecars)
require.NoError(t, err)
require.NoError(t, bs.Save(testSidecars[0]))
slot += 10000
}
require.NoError(t, bs.pruner.prune(currentSlot-bs.pruner.windowSize))
remainingFolders, err := afero.ReadDir(fs, ".")
require.NoError(t, err)
require.Equal(t, 4, len(remainingFolders))
})
}
func BenchmarkPruning(b *testing.B) {
var t *testing.T
_, bs := NewEphemeralBlobStorageWithFs(t)
blockQty := 10000
currentSlot := primitives.Slot(150000)
slot := primitives.Slot(0)
for j := 0; j <= blockQty; j++ {
root := bytesutil.ToBytes32(bytesutil.ToBytes(uint64(slot), 32))
_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, root, slot, params.BeaconConfig().MaxBlobsPerBlock(0))
testSidecars, err := verification.BlobSidecarSliceNoop(sidecars)
require.NoError(t, err)
require.NoError(t, bs.Save(testSidecars[0]))
slot += 100
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
err := bs.pruner.prune(currentSlot)
require.NoError(b, err)
}
}
func TestNewBlobStorage(t *testing.T) { func TestNewBlobStorage(t *testing.T) {
_, err := NewBlobStorage() _, err := NewBlobStorage()
require.ErrorIs(t, err, errNoBasePath) require.ErrorIs(t, err, errNoBasePath)
@@ -292,3 +211,13 @@ func TestConfig_WithinRetentionPeriod(t *testing.T) {
require.Equal(t, true, storage.WithinRetentionPeriod(1, 1)) require.Equal(t, true, storage.WithinRetentionPeriod(1, 1))
}) })
} }
func TestLayoutNames(t *testing.T) {
badLayoutName := "bad"
for _, name := range LayoutNames {
_, err := newLayout(name, nil, nil, nil)
require.NoError(t, err)
}
_, err := newLayout(badLayoutName, nil, nil, nil)
require.ErrorIs(t, err, errInvalidLayoutName)
}

View File

@@ -1,8 +1,10 @@
package filesystem package filesystem
import ( import (
"fmt"
"sync" "sync"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/db"
fieldparams "github.com/prysmaticlabs/prysm/v5/config/fieldparams" fieldparams "github.com/prysmaticlabs/prysm/v5/config/fieldparams"
"github.com/prysmaticlabs/prysm/v5/config/params" "github.com/prysmaticlabs/prysm/v5/config/params"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives" "github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
@@ -13,17 +15,12 @@ type blobIndexMask []bool
// BlobStorageSummary represents cached information about the BlobSidecars on disk for each root the cache knows about. // BlobStorageSummary represents cached information about the BlobSidecars on disk for each root the cache knows about.
type BlobStorageSummary struct { type BlobStorageSummary struct {
slot primitives.Slot epoch primitives.Epoch
mask blobIndexMask mask blobIndexMask
} }
// HasIndex returns true if the BlobSidecar at the given index is available in the filesystem. // HasIndex returns true if the BlobSidecar at the given index is available in the filesystem.
func (s BlobStorageSummary) HasIndex(idx uint64) bool { func (s BlobStorageSummary) HasIndex(idx uint64) bool {
// Protect from panic, but assume callers are sophisticated enough to not need an error telling them they have an invalid idx.
maxBlobsPerBlock := params.BeaconConfig().MaxBlobsPerBlock(s.slot)
if idx >= uint64(maxBlobsPerBlock) {
return false
}
if idx >= uint64(len(s.mask)) { if idx >= uint64(len(s.mask)) {
return false return false
} }
@@ -32,10 +29,6 @@ func (s BlobStorageSummary) HasIndex(idx uint64) bool {
// AllAvailable returns true if we have all blobs for all indices from 0 to count-1. // AllAvailable returns true if we have all blobs for all indices from 0 to count-1.
func (s BlobStorageSummary) AllAvailable(count int) bool { func (s BlobStorageSummary) AllAvailable(count int) bool {
maxBlobsPerBlock := params.BeaconConfig().MaxBlobsPerBlock(s.slot)
if count > maxBlobsPerBlock {
return false
}
if count > len(s.mask) { if count > len(s.mask) {
return false return false
} }
@@ -47,83 +40,121 @@ func (s BlobStorageSummary) AllAvailable(count int) bool {
return true return true
} }
func (s BlobStorageSummary) MaxBlobsForEpoch() uint64 {
return uint64(params.BeaconConfig().MaxBlobsPerBlockAtEpoch(s.epoch))
}
// NewBlobStorageSummary creates a new BlobStorageSummary for a given epoch and mask.
func NewBlobStorageSummary(epoch primitives.Epoch, mask []bool) (BlobStorageSummary, error) {
c := params.BeaconConfig().MaxBlobsPerBlockAtEpoch(epoch)
if len(mask) != c {
return BlobStorageSummary{}, fmt.Errorf("mask length %d does not match expected %d for epoch %d", len(mask), c, epoch)
}
return BlobStorageSummary{
epoch: epoch,
mask: mask,
}, nil
}
// BlobStorageSummarizer can be used to receive a summary of metadata about blobs on disk for a given root. // BlobStorageSummarizer can be used to receive a summary of metadata about blobs on disk for a given root.
// The BlobStorageSummary can be used to check which indices (if any) are available for a given block by root. // The BlobStorageSummary can be used to check which indices (if any) are available for a given block by root.
type BlobStorageSummarizer interface { type BlobStorageSummarizer interface {
Summary(root [32]byte) BlobStorageSummary Summary(root [32]byte) BlobStorageSummary
} }
type blobStorageCache struct { type blobStorageSummaryCache struct {
mu sync.RWMutex mu sync.RWMutex
nBlobs float64 nBlobs float64
cache map[[32]byte]BlobStorageSummary cache map[[32]byte]BlobStorageSummary
} }
var _ BlobStorageSummarizer = &blobStorageCache{} var _ BlobStorageSummarizer = &blobStorageSummaryCache{}
func newBlobStorageCache() *blobStorageCache { func newBlobStorageCache() *blobStorageSummaryCache {
return &blobStorageCache{ return &blobStorageSummaryCache{
cache: make(map[[32]byte]BlobStorageSummary, params.BeaconConfig().MinEpochsForBlobsSidecarsRequest*fieldparams.SlotsPerEpoch), cache: make(map[[32]byte]BlobStorageSummary),
} }
} }
// Summary returns the BlobStorageSummary for `root`. The BlobStorageSummary can be used to check for the presence of // Summary returns the BlobStorageSummary for `root`. The BlobStorageSummary can be used to check for the presence of
// BlobSidecars based on Index. // BlobSidecars based on Index.
func (s *blobStorageCache) Summary(root [32]byte) BlobStorageSummary { func (s *blobStorageSummaryCache) Summary(root [32]byte) BlobStorageSummary {
s.mu.RLock() s.mu.RLock()
defer s.mu.RUnlock() defer s.mu.RUnlock()
return s.cache[root] return s.cache[root]
} }
func (s *blobStorageCache) ensure(key [32]byte, slot primitives.Slot, idx uint64) error { func (s *blobStorageSummaryCache) ensure(ident blobIdent) error {
maxBlobsPerBlock := params.BeaconConfig().MaxBlobsPerBlock(slot) maxBlobsPerBlock := params.BeaconConfig().MaxBlobsPerBlockAtEpoch(ident.epoch)
if idx >= uint64(maxBlobsPerBlock) { if ident.index >= uint64(maxBlobsPerBlock) {
return errIndexOutOfBounds return errIndexOutOfBounds
} }
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock() defer s.mu.Unlock()
v := s.cache[key] v := s.cache[ident.root]
v.slot = slot v.epoch = ident.epoch
if v.mask == nil { if v.mask == nil {
v.mask = make(blobIndexMask, maxBlobsPerBlock) v.mask = make(blobIndexMask, maxBlobsPerBlock)
} }
if !v.mask[idx] { if !v.mask[ident.index] {
s.updateMetrics(1) s.updateMetrics(1)
} }
v.mask[idx] = true v.mask[ident.index] = true
s.cache[key] = v s.cache[ident.root] = v
return nil return nil
} }
func (s *blobStorageCache) slot(key [32]byte) (primitives.Slot, bool) { func (s *blobStorageSummaryCache) get(key [32]byte) (BlobStorageSummary, bool) {
s.mu.RLock() s.mu.RLock()
defer s.mu.RUnlock() defer s.mu.RUnlock()
v, ok := s.cache[key] v, ok := s.cache[key]
if !ok { return v, ok
return 0, false
}
return v.slot, ok
} }
func (s *blobStorageCache) evict(key [32]byte) { func (s *blobStorageSummaryCache) identForIdx(key [32]byte, idx uint64) (blobIdent, error) {
var deleted float64 v, ok := s.get(key)
if !ok || !v.HasIndex(idx) {
return blobIdent{}, db.ErrNotFound
}
return blobIdent{
root: key,
index: idx,
epoch: v.epoch,
}, nil
}
func (s *blobStorageSummaryCache) identForRoot(key [32]byte) (blobIdent, error) {
v, ok := s.get(key)
if !ok {
return blobIdent{}, db.ErrNotFound
}
return blobIdent{
root: key,
epoch: v.epoch,
}, nil
}
func (s *blobStorageSummaryCache) evict(key [32]byte) int {
deleted := 0
s.mu.Lock() s.mu.Lock()
defer s.mu.Unlock()
v, ok := s.cache[key] v, ok := s.cache[key]
if ok { if !ok {
for i := range v.mask { return 0
if v.mask[i] { }
deleted += 1 for i := range v.mask {
} if v.mask[i] {
deleted += 1
} }
} }
delete(s.cache, key) delete(s.cache, key)
s.mu.Unlock()
if deleted > 0 { if deleted > 0 {
s.updateMetrics(-deleted) s.updateMetrics(-float64(deleted))
} }
return deleted
} }
func (s *blobStorageCache) updateMetrics(delta float64) { func (s *blobStorageSummaryCache) updateMetrics(delta float64) {
s.nBlobs += delta s.nBlobs += delta
blobDiskCount.Set(s.nBlobs) blobDiskCount.Set(s.nBlobs)
blobDiskSize.Set(s.nBlobs * fieldparams.BlobSidecarSize) blobDiskSize.Set(s.nBlobs * fieldparams.BlobSidecarSize)

View File

@@ -53,7 +53,7 @@ func TestSlotByRoot_Summary(t *testing.T) {
for _, c := range cases { for _, c := range cases {
if c.expected != nil { if c.expected != nil {
key := bytesutil.ToBytes32([]byte(c.name)) key := bytesutil.ToBytes32([]byte(c.name))
sc.cache[key] = BlobStorageSummary{slot: 0, mask: c.expected} sc.cache[key] = BlobStorageSummary{epoch: 0, mask: c.expected}
} }
} }
for _, c := range cases { for _, c := range cases {

View File

@@ -0,0 +1,238 @@
package filesystem
import (
"fmt"
"io"
"path"
"path/filepath"
"strconv"
"strings"
"github.com/ethereum/go-ethereum/common/hexutil"
"github.com/pkg/errors"
"github.com/prysmaticlabs/prysm/v5/encoding/bytesutil"
"github.com/sirupsen/logrus"
"github.com/spf13/afero"
)
var errIdentFailure = errors.New("failed to determine blob metadata, ignoring all sub-paths.")
type identificationError struct {
err error
path string
ident blobIdent
}
func (ide *identificationError) Error() string {
return fmt.Sprintf("%s path=%s, err=%s", errIdentFailure.Error(), ide.path, ide.err.Error())
}
func (ide *identificationError) Unwrap() error {
return ide.err
}
func (*identificationError) Is(err error) bool {
return err == errIdentFailure
}
func (ide *identificationError) LogFields() logrus.Fields {
fields := ide.ident.logFields()
fields["path"] = ide.path
return fields
}
func newIdentificationError(path string, ident blobIdent, err error) *identificationError {
return &identificationError{path: path, ident: ident, err: err}
}
func listDir(fs afero.Fs, dir string) ([]string, error) {
top, err := fs.Open(dir)
if err != nil {
return nil, errors.Wrap(err, "failed to open directory descriptor")
}
defer func() {
if err := top.Close(); err != nil {
log.WithError(err).Errorf("Could not close file %s", dir)
}
}()
// re the -1 param: "If n <= 0, Readdirnames returns all the names from the directory in a single slice"
dirs, err := top.Readdirnames(-1)
if err != nil {
return nil, errors.Wrap(err, "failed to read directory listing")
}
return dirs, nil
}
// identPopulator is a function that sets values in the blobIdent for a given layer of the filesystem layout.
type identPopulator func(blobIdent, string) (blobIdent, error)
// layoutLayer represents a layer of the nested directory scheme. Each layer is defined by a filter function that
// ensures any entries at that layer of the scheme are named in a valid way, and a populateIdent function that
// parses the directory name into a blobIdent object, used for iterating across the layout in a layout-independent way.
type layoutLayer struct {
populateIdent identPopulator
filter func(string) bool
}
// identIterator moves through the filesystem in order to yield blobIdents.
// layoutLayers (in the 'layers' field) allows a filesystem layout to control how the
// layout is traversed. A layoutLayer can filter out entries from the directory listing
// via the filter function, and populate fields in the blobIdent via the populateIdent function.
// The blobIdent is populated from an empty value at the root, accumulating values for its fields at each layer.
// The fully populated blobIdent is returned when the iterator reaches the leaf layer.
type identIterator struct {
fs afero.Fs
path string
child *identIterator
ident blobIdent
// layoutLayers are the heart of how the layout defines the nesting of the components of the path.
// Each layer of the layout represents a different layer of the directory layout hierarchy,
// from the relative root at the zero index to the blob files at the end.
layers []layoutLayer
entries []string
offset int
eof bool
}
// atEOF can be used to peek at the iterator to see if it's already finished. This is useful for the migration code to check
// if there are any entries in the directory indicated by the migration.
func (iter *identIterator) atEOF() bool {
return iter.eof
}
// next is the only method that a user of the identIterator needs to call.
// identIterator will yield blobIdents in a breadth-first fashion,
// returning an empty blobIdent and io.EOF once all branches have been traversed.
func (iter *identIterator) next() (blobIdent, error) {
if iter.eof {
return blobIdent{}, io.EOF
}
if iter.child != nil {
next, err := iter.child.next()
if err == nil {
return next, nil
}
if !errors.Is(err, io.EOF) {
return blobIdent{}, err
}
}
return iter.advanceChild()
}
// advanceChild is used to move to the next directory at each layer of the tree, either when
// the nodes are first being initialized at a layer, or when a sub-branch has been exhausted.
func (iter *identIterator) advanceChild() (blobIdent, error) {
defer func() {
iter.offset += 1
}()
for i := iter.offset; i < len(iter.entries); i++ {
iter.offset = i
nextPath := filepath.Join(iter.path, iter.entries[iter.offset])
nextLayer := iter.layers[0]
if !nextLayer.filter(nextPath) {
continue
}
ident, err := nextLayer.populateIdent(iter.ident, nextPath)
if err != nil {
return ident, newIdentificationError(nextPath, ident, err)
}
// if we're at the leaf layer , we can return the updated ident.
if len(iter.layers) == 1 {
return ident, nil
}
entries, err := listDir(iter.fs, nextPath)
if err != nil {
return blobIdent{}, err
}
if len(entries) == 0 {
continue
}
iter.child = &identIterator{
fs: iter.fs,
path: nextPath,
ident: ident,
layers: iter.layers[1:],
entries: entries,
}
return iter.child.next()
}
return blobIdent{}, io.EOF
}
func populateNoop(namer blobIdent, _ string) (blobIdent, error) {
return namer, nil
}
func populateRoot(namer blobIdent, dir string) (blobIdent, error) {
root, err := rootFromPath(dir)
if err != nil {
return namer, err
}
namer.root = root
return namer, nil
}
func populateIndex(namer blobIdent, fname string) (blobIdent, error) {
idx, err := idxFromPath(fname)
if err != nil {
return namer, err
}
namer.index = idx
return namer, nil
}
func rootFromPath(p string) ([32]byte, error) {
subdir := filepath.Base(p)
root, err := stringToRoot(subdir)
if err != nil {
return root, errors.Wrapf(err, "invalid directory, could not parse subdir as root %s", p)
}
return root, nil
}
func idxFromPath(p string) (uint64, error) {
p = path.Base(p)
if !isSszFile(p) {
return 0, errors.Wrap(errNotBlobSSZ, "does not have .ssz extension")
}
parts := strings.Split(p, ".")
if len(parts) != 2 {
return 0, errors.Wrap(errNotBlobSSZ, "unexpected filename structure (want <index>.ssz)")
}
idx, err := strconv.ParseUint(parts[0], 10, 64)
if err != nil {
return 0, err
}
return idx, nil
}
func filterNoop(_ string) bool {
return true
}
func isRootDir(p string) bool {
dir := filepath.Base(p)
return len(dir) == rootStringLen && strings.HasPrefix(dir, "0x")
}
func isSszFile(s string) bool {
return filepath.Ext(s) == "."+sszExt
}
func rootToString(root [32]byte) string {
return fmt.Sprintf("%#x", root)
}
func stringToRoot(str string) ([32]byte, error) {
if len(str) != rootStringLen {
return [32]byte{}, errors.Wrapf(errInvalidRootString, "incorrect len for input=%s", str)
}
slice, err := hexutil.Decode(str)
if err != nil {
return [32]byte{}, errors.Wrapf(errInvalidRootString, "input=%s", str)
}
return bytesutil.ToBytes32(slice), nil
}

View File

@@ -0,0 +1,304 @@
package filesystem
import (
"bytes"
"fmt"
"io"
"math"
"os"
"path"
"sort"
"testing"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/verification"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
"github.com/prysmaticlabs/prysm/v5/testing/require"
"github.com/prysmaticlabs/prysm/v5/testing/util"
"github.com/spf13/afero"
)
func TestRootFromDir(t *testing.T) {
cases := []struct {
name string
dir string
err error
root [32]byte
}{
{
name: "happy path",
dir: "0xffff875e1d985c5ccb214894983f2428edb271f0f87b68ba7010e4a99df3b5cb",
root: [32]byte{255, 255, 135, 94, 29, 152, 92, 92, 203, 33, 72, 148, 152, 63, 36, 40,
237, 178, 113, 240, 248, 123, 104, 186, 112, 16, 228, 169, 157, 243, 181, 203},
},
{
name: "too short",
dir: "0xffff875e1d985c5ccb214894983f2428edb271f0f87b68ba7010e4a99df3b5c",
err: errInvalidRootString,
},
{
name: "too log",
dir: "0xffff875e1d985c5ccb214894983f2428edb271f0f87b68ba7010e4a99df3b5cbb",
err: errInvalidRootString,
},
{
name: "missing prefix",
dir: "ffff875e1d985c5ccb214894983f2428edb271f0f87b68ba7010e4a99df3b5cb",
err: errInvalidRootString,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
root, err := stringToRoot(c.dir)
if c.err != nil {
require.ErrorIs(t, err, c.err)
return
}
require.NoError(t, err)
require.Equal(t, c.root, root)
})
}
}
func TestSlotFromFile(t *testing.T) {
cases := []struct {
slot primitives.Slot
}{
{slot: 0},
{slot: 2},
{slot: 1123581321},
{slot: math.MaxUint64},
}
for _, c := range cases {
t.Run(fmt.Sprintf("slot %d", c.slot), func(t *testing.T) {
fs, bs := NewEphemeralBlobStorageAndFs(t)
_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, c.slot, 1)
sc := verification.FakeVerifyForTest(t, sidecars[0])
require.NoError(t, bs.Save(sc))
namer := identForSidecar(sc)
sszPath := bs.layout.sszPath(namer)
slot, err := slotFromFile(sszPath, fs)
require.NoError(t, err)
require.Equal(t, c.slot, slot)
})
}
}
type dirFiles struct {
name string
isDir bool
children []dirFiles
}
func (df dirFiles) reify(t *testing.T, fs afero.Fs, base string) {
fullPath := path.Join(base, df.name)
if df.isDir {
if df.name != "" {
require.NoError(t, fs.Mkdir(fullPath, directoryPermissions()))
}
for _, c := range df.children {
c.reify(t, fs, fullPath)
}
} else {
fp, err := fs.Create(fullPath)
require.NoError(t, err)
_, err = fp.WriteString("derp")
require.NoError(t, err)
}
}
func (df dirFiles) childNames() []string {
cn := make([]string, len(df.children))
for i := range df.children {
cn[i] = df.children[i].name
}
return cn
}
func TestListDir(t *testing.T) {
fs := afero.NewMemMapFs()
rootStrs := []string{
"0x0023dc5d063c7c1b37016bb54963c6ff4bfe5dfdf6dac29e7ceeb2b8fa81ed7a",
"0xff30526cd634a5af3a09cc9bff67f33a621fc5b975750bb4432f74df077554b4",
"0x23f5f795aaeb78c01fadaf3d06da2e99bd4b3622ae4dfea61b05b7d9adb119c2",
}
// parent directory
tree := dirFiles{isDir: true}
// break out each subdir for easier assertions
notABlob := dirFiles{name: "notABlob", isDir: true}
childlessBlob := dirFiles{name: rootStrs[0], isDir: true}
blobWithSsz := dirFiles{name: rootStrs[1], isDir: true,
children: []dirFiles{{name: "1.ssz"}, {name: "2.ssz"}},
}
blobWithSszAndTmp := dirFiles{name: rootStrs[2], isDir: true,
children: []dirFiles{{name: "5.ssz"}, {name: "0.part"}}}
tree.children = append(tree.children,
notABlob, childlessBlob, blobWithSsz, blobWithSszAndTmp)
topChildren := make([]string, len(tree.children))
for i := range tree.children {
topChildren[i] = tree.children[i].name
}
var filter = func(entries []string, filt func(string) bool) []string {
filtered := make([]string, 0, len(entries))
for i := range entries {
if filt(entries[i]) {
filtered = append(filtered, entries[i])
}
}
return filtered
}
tree.reify(t, fs, "")
cases := []struct {
name string
dirPath string
expected []string
filter func(string) bool
err error
}{
{
name: "non-existent",
dirPath: "derp",
expected: []string{},
err: os.ErrNotExist,
},
{
name: "empty",
dirPath: childlessBlob.name,
expected: []string{},
},
{
name: "top",
dirPath: ".",
expected: topChildren,
},
{
name: "custom filter: only notABlob",
dirPath: ".",
expected: []string{notABlob.name},
filter: func(s string) bool {
return s == notABlob.name
},
},
{
name: "root filter",
dirPath: ".",
expected: []string{childlessBlob.name, blobWithSsz.name, blobWithSszAndTmp.name},
filter: isRootDir,
},
{
name: "ssz filter",
dirPath: blobWithSsz.name,
expected: blobWithSsz.childNames(),
filter: isSszFile,
},
{
name: "ssz mixed filter",
dirPath: blobWithSszAndTmp.name,
expected: []string{"5.ssz"},
filter: isSszFile,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
result, err := listDir(fs, c.dirPath)
if c.filter != nil {
result = filter(result, c.filter)
}
if c.err != nil {
require.ErrorIs(t, err, c.err)
require.Equal(t, 0, len(result))
} else {
require.NoError(t, err)
sort.Strings(c.expected)
sort.Strings(result)
require.DeepEqual(t, c.expected, result)
}
})
}
}
func TestSlotFromBlob(t *testing.T) {
cases := []struct {
slot primitives.Slot
}{
{slot: 0},
{slot: 2},
{slot: 1123581321},
{slot: math.MaxUint64},
}
for _, c := range cases {
t.Run(fmt.Sprintf("slot %d", c.slot), func(t *testing.T) {
_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, c.slot, 1)
sc := sidecars[0]
enc, err := sc.MarshalSSZ()
require.NoError(t, err)
slot, err := slotFromBlob(bytes.NewReader(enc))
require.NoError(t, err)
require.Equal(t, c.slot, slot)
})
}
}
func TestIterationComplete(t *testing.T) {
targets := []migrationTestTarget{
{
ident: ezIdent(t, "0x0125e54c64c925018c9296965a5b622d9f5ab626c10917860dcfb6aa09a0a00b", 1234, 0),
path: "by-epoch/0/1234/0x0125e54c64c925018c9296965a5b622d9f5ab626c10917860dcfb6aa09a0a00b/0.ssz",
},
{
ident: ezIdent(t, "0x0127dba6fd30fdbb47e73e861d5c6e602b38ac3ddc945bb6a2fc4e10761e9a86", 5330, 0),
slotOffset: 31,
path: "by-epoch/1/5330/0x0127dba6fd30fdbb47e73e861d5c6e602b38ac3ddc945bb6a2fc4e10761e9a86/0.ssz",
},
{
ident: ezIdent(t, "0x0127dba6fd30fdbb47e73e861d5c6e602b38ac3ddc945bb6a2fc4e10761e9a86", 5330, 1),
slotOffset: 31,
path: "by-epoch/1/5330/0x0127dba6fd30fdbb47e73e861d5c6e602b38ac3ddc945bb6a2fc4e10761e9a86/1.ssz",
},
{
ident: ezIdent(t, "0x0232521756a0b965eab2c2245d7ad85feaeaf5f427cd14d1a7531f9d555b415c", 16777216, 0),
slotOffset: 16,
path: "by-epoch/4096/16777216/0x0232521756a0b965eab2c2245d7ad85feaeaf5f427cd14d1a7531f9d555b415c/0.ssz",
},
{
ident: ezIdent(t, "0x0232521756a0b965eab2c2245d7ad85feaeaf5f427cd14d1a7531f9d555b415c", 16777216, 1),
slotOffset: 16,
path: "by-epoch/4096/16777216/0x0232521756a0b965eab2c2245d7ad85feaeaf5f427cd14d1a7531f9d555b415c/1.ssz",
},
{
ident: ezIdent(t, "0x42eabe3d2c125410cd226de6f2825fb7575ab896c3f52e43de1fa29e4c809aba", 16777217, 0),
slotOffset: 16,
path: "by-epoch/4096/16777217/0x42eabe3d2c125410cd226de6f2825fb7575ab896c3f52e43de1fa29e4c809aba/0.ssz",
},
{
ident: ezIdent(t, "0x666cea5034e22bd3b849cb33914cad59afd88ee08e4d5bc0e997411c945fbc1d", 11235, 1),
path: "by-epoch/2/11235/0x666cea5034e22bd3b849cb33914cad59afd88ee08e4d5bc0e997411c945fbc1d/1.ssz",
},
}
fs := afero.NewMemMapFs()
cache := newBlobStorageCache()
byEpoch, err := newLayout(LayoutNameByEpoch, fs, cache, nil)
require.NoError(t, err)
for _, tar := range targets {
setupTestBlobFile(t, tar.ident, tar.slotOffset, fs, byEpoch)
}
iter, err := byEpoch.iterateIdents(0)
require.NoError(t, err)
nIdents := 0
for ident, err := iter.next(); err != io.EOF; ident, err = iter.next() {
require.NoError(t, err)
nIdents++
require.NoError(t, cache.ensure(ident))
}
require.Equal(t, len(targets), nIdents)
for _, tar := range targets {
entry, ok := cache.get(tar.ident.root)
require.Equal(t, true, ok)
require.Equal(t, tar.ident.epoch, entry.epoch)
require.Equal(t, true, entry.HasIndex(tar.ident.index))
require.Equal(t, tar.path, byEpoch.sszPath(tar.ident))
}
}

View File

@@ -0,0 +1,291 @@
package filesystem
import (
"fmt"
"io"
"path/filepath"
"strings"
"time"
"github.com/pkg/errors"
"github.com/prysmaticlabs/prysm/v5/config/params"
"github.com/prysmaticlabs/prysm/v5/consensus-types/blocks"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
"github.com/prysmaticlabs/prysm/v5/time/slots"
"github.com/sirupsen/logrus"
"github.com/spf13/afero"
)
const (
// Full root in directory will be 66 chars, eg:
// >>> len('0x0002fb4db510b8618b04dc82d023793739c26346a8b02eb73482e24b0fec0555') == 66
rootStringLen = 66
sszExt = "ssz"
partExt = "part"
periodicEpochBaseDir = "by-epoch"
)
const (
LayoutNameFlat = "flat"
LayoutNameByEpoch = "by-epoch"
)
var LayoutNames = []string{LayoutNameFlat, LayoutNameByEpoch}
var (
errMigrationFailure = errors.New("unable to migrate blob directory between old and new layout")
errCacheWarmFailed = errors.New("failed to warm blob filesystem cache")
errPruneFailed = errors.New("failed to prune root")
errInvalidRootString = errors.New("Could not parse hex string as a [32]byte")
errInvalidDirectoryLayout = errors.New("Could not parse blob directory path")
errInvalidLayoutName = errors.New("unknown layout name")
errLayoutNotDetected = errors.New("given layout not observed in the blob filesystem tree")
)
type blobIdent struct {
root [32]byte
epoch primitives.Epoch
index uint64
}
func newBlobIdent(root [32]byte, epoch primitives.Epoch, index uint64) blobIdent {
return blobIdent{root: root, epoch: epoch, index: index}
}
func identForSidecar(sc blocks.VerifiedROBlob) blobIdent {
return newBlobIdent(sc.BlockRoot(), slots.ToEpoch(sc.Slot()), sc.Index)
}
func (n blobIdent) sszFname() string {
return fmt.Sprintf("%d.%s", n.index, sszExt)
}
func (n blobIdent) partFname(entropy string) string {
return fmt.Sprintf("%s-%d.%s", entropy, n.index, partExt)
}
func (n blobIdent) logFields() logrus.Fields {
return logrus.Fields{
"root": fmt.Sprintf("%#x", n.root),
"epoch": n.epoch,
"index": n.index,
}
}
type fsLayout interface {
name() string
dir(n blobIdent) string
sszPath(n blobIdent) string
partPath(n blobIdent, entropy string) string
iterateIdents(before primitives.Epoch) (*identIterator, error)
ident(root [32]byte, idx uint64) (blobIdent, error)
dirIdent(root [32]byte) (blobIdent, error)
summary(root [32]byte) BlobStorageSummary
notify(ident blobIdent) error
pruneBefore(before primitives.Epoch) (*pruneSummary, error)
remove(ident blobIdent) (int, error)
blockParentDirs(ident blobIdent) []string
}
func newLayout(name string, fs afero.Fs, cache *blobStorageSummaryCache, pruner *blobPruner) (fsLayout, error) {
switch name {
case LayoutNameFlat:
return newFlatLayout(fs, cache, pruner), nil
case LayoutNameByEpoch:
return newPeriodicEpochLayout(fs, cache, pruner), nil
default:
return nil, errors.Wrapf(errInvalidLayoutName, "name=%s", name)
}
}
func warmCache(l fsLayout, cache *blobStorageSummaryCache) error {
iter, err := l.iterateIdents(0)
if err != nil {
return errors.Wrap(errCacheWarmFailed, err.Error())
}
for ident, err := iter.next(); !errors.Is(err, io.EOF); ident, err = iter.next() {
if errors.Is(err, errIdentFailure) {
idf := &identificationError{}
if errors.As(err, &idf) {
log.WithFields(idf.LogFields()).WithError(err).Error("Failed to cache blob data for path")
}
continue
}
if err != nil {
return fmt.Errorf("%w: failed to populate blob data cache: %w", errCacheWarmFailed, err)
}
if err := cache.ensure(ident); err != nil {
return fmt.Errorf("%w: failed to write cache entry for %s: %w", errCacheWarmFailed, l.sszPath(ident), err)
}
}
return nil
}
func migrateLayout(fs afero.Fs, from, to fsLayout, cache *blobStorageSummaryCache) error {
start := time.Now()
iter, err := from.iterateIdents(0)
if err != nil {
return errors.Wrapf(errMigrationFailure, "failed to iterate legacy structure while migrating blobs, err=%s", err.Error())
}
if iter.atEOF() {
return errLayoutNotDetected
}
log.WithField("fromLayout", from.name()).WithField("toLayout", to.name()).Info("Migrating blob filesystem layout. This one-time operation can take extra time (up to a few minutes for systems with extended blob storage and a cold disk cache).")
lastMoved := ""
parentDirs := make(map[string]bool) // this map should have < 65k keys by design
moved := 0
dc := newDirCleaner()
for ident, err := iter.next(); !errors.Is(err, io.EOF); ident, err = iter.next() {
if err != nil {
if errors.Is(err, errIdentFailure) {
idf := &identificationError{}
if errors.As(err, &idf) {
log.WithFields(idf.LogFields()).WithError(err).Error("Failed to migrate blob path")
}
continue
}
return errors.Wrapf(errMigrationFailure, "failed to iterate previous layout structure while migrating blobs, err=%s", err.Error())
}
src := from.dir(ident)
target := to.dir(ident)
if src != lastMoved {
targetParent := filepath.Dir(target)
if targetParent != "" && targetParent != "." && !parentDirs[targetParent] {
if err := fs.MkdirAll(targetParent, directoryPermissions()); err != nil {
return errors.Wrapf(errMigrationFailure, "failed to make enclosing path before moving %s to %s", src, target)
}
parentDirs[targetParent] = true
}
if err := fs.Rename(src, target); err != nil {
return errors.Wrapf(errMigrationFailure, "could not rename %s to %s", src, target)
}
moved += 1
lastMoved = src
for _, dir := range from.blockParentDirs(ident) {
dc.add(dir)
}
}
if err := cache.ensure(ident); err != nil {
return errors.Wrapf(errMigrationFailure, "could not cache path %s, err=%s", to.sszPath(ident), err.Error())
}
}
dc.clean(fs)
if moved > 0 {
log.WithField("dirsMoved", moved).WithField("elapsed", time.Since(start)).
Info("Blob filesystem migration complete.")
}
return nil
}
type dirCleaner struct {
maxDepth int
layers map[int]map[string]struct{}
}
func newDirCleaner() *dirCleaner {
return &dirCleaner{layers: make(map[int]map[string]struct{})}
}
func (d *dirCleaner) add(dir string) {
nLayers := len(strings.Split(dir, string(filepath.Separator)))
_, ok := d.layers[nLayers]
if !ok {
d.layers[nLayers] = make(map[string]struct{})
}
d.layers[nLayers][dir] = struct{}{}
if nLayers > d.maxDepth {
d.maxDepth = nLayers
}
}
func (d *dirCleaner) clean(fs afero.Fs) {
for i := d.maxDepth; i >= 0; i-- {
d.cleanLayer(fs, i)
}
}
func (d *dirCleaner) cleanLayer(fs afero.Fs, layer int) {
dirs, ok := d.layers[layer]
if !ok {
return
}
for dir := range dirs {
// Use Remove rather than RemoveAll to make sure we're only removing empty directories
if err := fs.Remove(dir); err != nil {
log.WithField("dir", dir).WithError(err).Error("Failed to remove blob directory, please remove it manually if desired.")
contents, err := listDir(fs, dir)
if err != nil {
log.WithField("dir", dir).WithError(err).Error("Could not list blob directory contents to find reason for removal failure.")
continue
}
for _, c := range contents {
log.WithField("file", c).WithField("dir", dir).Debug("Unexpected file blocking migrated blob directory cleanup.")
}
}
}
}
type pruneSummary struct {
blobsPruned int
failedRemovals []string
}
func (s pruneSummary) LogFields() logrus.Fields {
return logrus.Fields{
"blobsPruned": s.blobsPruned,
"failedRemovals": len(s.failedRemovals),
}
}
func pruneBefore(before primitives.Epoch, l fsLayout) (map[primitives.Epoch]*pruneSummary, error) {
sums := make(map[primitives.Epoch]*pruneSummary)
iter, err := l.iterateIdents(before)
if err != nil {
return nil, errors.Wrap(err, "failed to iterate blob paths for pruning")
}
// We will get an ident for each index, but want to prune all indexes for the given root together.
var lastIdent blobIdent
for ident, err := iter.next(); !errors.Is(err, io.EOF); ident, err = iter.next() {
if err != nil {
if errors.Is(err, errIdentFailure) {
idf := &identificationError{}
if errors.As(err, &idf) {
log.WithFields(idf.LogFields()).WithError(err).Error("Failed to prune blob path due to identification errors")
}
continue
}
log.WithError(err).Error("encountered unhandled error during pruning")
return nil, errors.Wrap(errPruneFailed, err.Error())
}
if ident.epoch >= before {
continue
}
if lastIdent.root != ident.root {
pruneOne(lastIdent, l, sums)
lastIdent = ident
}
}
// handle the final ident
pruneOne(lastIdent, l, sums)
return sums, nil
}
func pruneOne(ident blobIdent, l fsLayout, sums map[primitives.Epoch]*pruneSummary) {
// Skip pruning the n-1 ident if we're on the first real ident (lastIdent will be zero value).
if ident.root == params.BeaconConfig().ZeroHash {
return
}
_, ok := sums[ident.epoch]
if !ok {
sums[ident.epoch] = &pruneSummary{}
}
s := sums[ident.epoch]
removed, err := l.remove(ident)
if err != nil {
s.failedRemovals = append(s.failedRemovals, l.dir(ident))
log.WithField("root", fmt.Sprintf("%#x", ident.root)).Error("Failed to delete blob directory for root")
}
s.blobsPruned += removed
}

View File

@@ -0,0 +1,212 @@
package filesystem
import (
"fmt"
"os"
"path"
"path/filepath"
"strconv"
"github.com/pkg/errors"
"github.com/prysmaticlabs/prysm/v5/config/params"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
"github.com/spf13/afero"
)
const epochsPerDirectory = 4096
type periodicEpochLayout struct {
fs afero.Fs
cache *blobStorageSummaryCache
pruner *blobPruner
}
var _ fsLayout = &periodicEpochLayout{}
func newPeriodicEpochLayout(fs afero.Fs, cache *blobStorageSummaryCache, pruner *blobPruner) fsLayout {
l := &periodicEpochLayout{fs: fs, cache: cache, pruner: pruner}
return l
}
func (l *periodicEpochLayout) name() string {
return LayoutNameByEpoch
}
func (l *periodicEpochLayout) blockParentDirs(ident blobIdent) []string {
return []string{
periodicEpochBaseDir,
l.periodDir(ident.epoch),
l.epochDir(ident.epoch),
}
}
func (l *periodicEpochLayout) notify(ident blobIdent) error {
if err := l.cache.ensure(ident); err != nil {
return err
}
l.pruner.notify(ident.epoch, l)
return nil
}
// If before == 0, it won't be used as a filter and all idents will be returned.
func (l *periodicEpochLayout) iterateIdents(before primitives.Epoch) (*identIterator, error) {
_, err := l.fs.Stat(periodicEpochBaseDir)
if err != nil {
if os.IsNotExist(err) {
return &identIterator{eof: true}, nil // The directory is non-existent, which is fine; stop iteration.
}
return nil, errors.Wrapf(err, "error reading path %s", periodicEpochBaseDir)
}
// iterate root, which should have directories named by "period"
entries, err := listDir(l.fs, periodicEpochBaseDir)
if err != nil {
return nil, errors.Wrapf(err, "failed to list %s", periodicEpochBaseDir)
}
return &identIterator{
fs: l.fs,
path: periodicEpochBaseDir,
// Please see comments on the `layers` field in `identIterator`` if the role of the layers is unclear.
layers: []layoutLayer{
{populateIdent: populateNoop, filter: isBeforePeriod(before)},
{populateIdent: populateEpoch, filter: isBeforeEpoch(before)},
{populateIdent: populateRoot, filter: isRootDir}, // extract root from path
{populateIdent: populateIndex, filter: isSszFile}, // extract index from filename
},
entries: entries,
}, nil
}
func (l *periodicEpochLayout) ident(root [32]byte, idx uint64) (blobIdent, error) {
return l.cache.identForIdx(root, idx)
}
func (l *periodicEpochLayout) dirIdent(root [32]byte) (blobIdent, error) {
return l.cache.identForRoot(root)
}
func (l *periodicEpochLayout) summary(root [32]byte) BlobStorageSummary {
return l.cache.Summary(root)
}
func (l *periodicEpochLayout) dir(n blobIdent) string {
return filepath.Join(l.epochDir(n.epoch), rootToString(n.root))
}
func (l *periodicEpochLayout) epochDir(epoch primitives.Epoch) string {
return filepath.Join(l.periodDir(epoch), fmt.Sprintf("%d", epoch))
}
func (l *periodicEpochLayout) periodDir(epoch primitives.Epoch) string {
return filepath.Join(periodicEpochBaseDir, fmt.Sprintf("%d", periodForEpoch(epoch)))
}
func (l *periodicEpochLayout) sszPath(n blobIdent) string {
return filepath.Join(l.dir(n), n.sszFname())
}
func (l *periodicEpochLayout) partPath(n blobIdent, entropy string) string {
return path.Join(l.dir(n), n.partFname(entropy))
}
func (l *periodicEpochLayout) pruneBefore(before primitives.Epoch) (*pruneSummary, error) {
sums, err := pruneBefore(before, l)
if err != nil {
return nil, err
}
// Roll up summaries and clean up per-epoch directories.
rollup := &pruneSummary{}
for epoch, sum := range sums {
rollup.blobsPruned += sum.blobsPruned
rollup.failedRemovals = append(rollup.failedRemovals, sum.failedRemovals...)
rmdir := l.epochDir(epoch)
if len(sum.failedRemovals) == 0 {
if err := l.fs.Remove(rmdir); err != nil {
log.WithField("dir", rmdir).WithError(err).Error("Failed to remove epoch directory while pruning")
}
} else {
log.WithField("dir", rmdir).WithField("numFailed", len(sum.failedRemovals)).WithError(err).Error("Unable to remove epoch directory due to pruning failures")
}
}
return rollup, nil
}
func (l *periodicEpochLayout) remove(ident blobIdent) (int, error) {
removed := l.cache.evict(ident.root)
// Skip the syscall if there are no blobs to remove.
if removed == 0 {
return 0, nil
}
if err := l.fs.RemoveAll(l.dir(ident)); err != nil {
return removed, err
}
return removed, nil
}
func periodForEpoch(epoch primitives.Epoch) primitives.Epoch {
return epoch / params.BeaconConfig().MinEpochsForBlobsSidecarsRequest
}
// Funcs below this line are iteration support methods that are specific to the epoch layout.
func isBeforePeriod(before primitives.Epoch) func(string) bool {
if before == 0 {
return filterNoop
}
beforePeriod := periodForEpoch(before)
if before%epochsPerDirectory != 0 {
// Add one because we need to include the period the epoch is in, unless it is the first epoch in the period,
// in which case we can just look at any previous period.
beforePeriod += 1
}
return func(p string) bool {
period, err := periodFromPath(p)
if err != nil {
return false
}
return primitives.Epoch(period) < beforePeriod
}
}
func isBeforeEpoch(before primitives.Epoch) func(string) bool {
if before == 0 {
return filterNoop
}
return func(p string) bool {
epoch, err := epochFromPath(p)
if err != nil {
return false
}
return epoch < before
}
}
func epochFromPath(p string) (primitives.Epoch, error) {
subdir := filepath.Base(p)
epoch, err := strconv.ParseUint(subdir, 10, 64)
if err != nil {
return 0, errors.Wrapf(errInvalidDirectoryLayout,
"failed to decode epoch as uint, err=%s, dir=%s", err.Error(), p)
}
return primitives.Epoch(epoch), nil
}
func periodFromPath(p string) (uint64, error) {
subdir := filepath.Base(p)
period, err := strconv.ParseUint(subdir, 10, 64)
if err != nil {
return 0, errors.Wrapf(errInvalidDirectoryLayout,
"failed to decode period from path as uint, err=%s, dir=%s", err.Error(), p)
}
return period, nil
}
func populateEpoch(namer blobIdent, dir string) (blobIdent, error) {
epoch, err := epochFromPath(dir)
if err != nil {
return namer, err
}
namer.epoch = epoch
return namer, nil
}

View File

@@ -0,0 +1,219 @@
package filesystem
import (
"encoding/binary"
"io"
"os"
"path"
"github.com/pkg/errors"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
"github.com/prysmaticlabs/prysm/v5/time/slots"
"github.com/spf13/afero"
)
type flatLayout struct {
fs afero.Fs
cache *blobStorageSummaryCache
pruner *blobPruner
}
var _ fsLayout = &flatLayout{}
func newFlatLayout(fs afero.Fs, cache *blobStorageSummaryCache, pruner *blobPruner) fsLayout {
l := &flatLayout{fs: fs, cache: cache, pruner: pruner}
return l
}
func (l *flatLayout) iterateIdents(before primitives.Epoch) (*identIterator, error) {
if _, err := l.fs.Stat("."); err != nil {
if os.IsNotExist(err) {
return &identIterator{eof: true}, nil // The directory is non-existent, which is fine; stop iteration.
}
return nil, errors.Wrapf(err, "error reading path %s", periodicEpochBaseDir)
}
entries, err := listDir(l.fs, ".")
if err != nil {
return nil, errors.Wrapf(err, "could not list root directory")
}
slotAndIndex := &flatSlotReader{fs: l.fs, cache: l.cache, before: before}
return &identIterator{
fs: l.fs,
// Please see comments on the `layers` field in `identIterator`` if the role of the layers is unclear.
layers: []layoutLayer{
{populateIdent: populateRoot, filter: isFlatCachedAndBefore(l.cache, before)},
{populateIdent: slotAndIndex.populateEpoch, filter: slotAndIndex.isSSZAndBefore}},
entries: entries,
}, nil
}
func (*flatLayout) name() string {
return LayoutNameFlat
}
func (l *flatLayout) blockParentDirs(ident blobIdent) []string {
return []string{}
}
func (*flatLayout) dir(n blobIdent) string {
return rootToString(n.root)
}
func (l *flatLayout) sszPath(n blobIdent) string {
return path.Join(l.dir(n), n.sszFname())
}
func (l *flatLayout) partPath(n blobIdent, entropy string) string {
return path.Join(l.dir(n), n.partFname(entropy))
}
func (l *flatLayout) ident(root [32]byte, idx uint64) (blobIdent, error) {
return l.cache.identForIdx(root, idx)
}
func (l *flatLayout) dirIdent(root [32]byte) (blobIdent, error) {
return l.cache.identForRoot(root)
}
func (l *flatLayout) summary(root [32]byte) BlobStorageSummary {
return l.cache.Summary(root)
}
func (l *flatLayout) remove(ident blobIdent) (int, error) {
removed := l.cache.evict(ident.root)
if err := l.fs.RemoveAll(l.dir(ident)); err != nil {
return removed, err
}
return removed, nil
}
func (l *flatLayout) notify(ident blobIdent) error {
if err := l.cache.ensure(ident); err != nil {
return err
}
l.pruner.notify(ident.epoch, l)
return nil
}
func (l *flatLayout) pruneBefore(before primitives.Epoch) (*pruneSummary, error) {
sums, err := pruneBefore(before, l)
if err != nil {
return nil, err
}
// Roll up summaries and clean up per-epoch directories.
rollup := &pruneSummary{}
for _, sum := range sums {
rollup.blobsPruned += sum.blobsPruned
rollup.failedRemovals = append(rollup.failedRemovals, sum.failedRemovals...)
}
return rollup, nil
}
// Below this line are iteration support funcs and types that are specific to the flat layout.
// Read slot from marshaled BlobSidecar data in the given file. See slotFromBlob for details.
func slotFromFile(name string, fs afero.Fs) (primitives.Slot, error) {
f, err := fs.Open(name)
if err != nil {
return 0, err
}
defer func() {
if err := f.Close(); err != nil {
log.WithError(err).Errorf("Could not close blob file")
}
}()
return slotFromBlob(f)
}
// slotFromBlob reads the ssz data of a file at the specified offset (8 + 131072 + 48 + 48 = 131176 bytes),
// which is calculated based on the size of the BlobSidecar struct and is based on the size of the fields
// preceding the slot information within SignedBeaconBlockHeader.
func slotFromBlob(at io.ReaderAt) (primitives.Slot, error) {
b := make([]byte, 8)
_, err := at.ReadAt(b, 131176)
if err != nil {
return 0, err
}
rawSlot := binary.LittleEndian.Uint64(b)
return primitives.Slot(rawSlot), nil
}
type flatSlotReader struct {
before primitives.Epoch
fs afero.Fs
cache *blobStorageSummaryCache
}
func (l *flatSlotReader) populateEpoch(ident blobIdent, fname string) (blobIdent, error) {
ident, err := populateIndex(ident, fname)
if err != nil {
return ident, err
}
sum, ok := l.cache.get(ident.root)
if ok {
ident.epoch = sum.epoch
// Return early if the index is already known to the cache.
if sum.HasIndex(ident.index) {
return ident, nil
}
} else {
// If the root is not in the cache, we need to read the slot from the file.
slot, err := slotFromFile(fname, l.fs)
if err != nil {
return ident, err
}
ident.epoch = slots.ToEpoch(slot)
}
return ident, l.cache.ensure(ident)
}
func (l *flatSlotReader) isSSZAndBefore(fname string) bool {
if !isSszFile(fname) {
return false
}
// If 'before' != 0, assuming isSSZAndBefore is used as a filter on the same layer with populateEpoch, this will typically
// call populateEpoch before the iteration code calls it. So we can guarantee that the cache gets populated
// in either case, because if it is filtered out here, we either have a malformed path (root can't be determined) in which case
// the filter code won't call it anyway, or we have a valid path and the cache will be populated before the epoch can be compared.
if l.before == 0 {
return true
}
ident, err := populateRoot(blobIdent{}, path.Dir(fname))
// Filter out the path if we can't determine its root.
if err != nil {
return false
}
ident, err = l.populateEpoch(ident, fname)
// Filter out the path if we can't determine its epoch or properly cache it.
if err != nil {
return false
}
return ident.epoch < l.before
}
// isFlatCachedAndBefore returns a filter callback function to exclude roots that are known to be after the given epoch
// based on the cache. It's an opportunistic filter; if the cache is not populated, it will not attempt to populate it.
// isSSZAndBefore on the other hand, is a strict filter that will only return true if the file is an SSZ file and
// the epoch can be determined.
func isFlatCachedAndBefore(cache *blobStorageSummaryCache, before primitives.Epoch) func(string) bool {
if before == 0 {
return isRootDir
}
return func(p string) bool {
if !isRootDir(p) {
return false
}
root, err := rootFromPath(p)
if err != nil {
return false
}
sum, ok := cache.get(root)
// If we don't know the epoch by looking at the root, don't try to filter it.
if !ok {
return true
}
return sum.epoch < before
}
}

View File

@@ -0,0 +1,75 @@
package filesystem
import (
"testing"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
"github.com/prysmaticlabs/prysm/v5/testing/require"
)
type mockLayout struct {
pruneBeforeFunc func(primitives.Epoch) (*pruneSummary, error)
}
var _ fsLayout = &mockLayout{}
func (m *mockLayout) name() string {
return "mock"
}
func (*mockLayout) dir(_ blobIdent) string {
return ""
}
func (*mockLayout) blockParentDirs(id blobIdent) []string {
return []string{}
}
func (*mockLayout) sszPath(_ blobIdent) string {
return ""
}
func (*mockLayout) partPath(_ blobIdent, _ string) string {
return ""
}
func (*mockLayout) iterateIdents(_ primitives.Epoch) (*identIterator, error) {
return nil, nil
}
func (*mockLayout) ident(_ [32]byte, _ uint64) (blobIdent, error) {
return blobIdent{}, nil
}
func (*mockLayout) dirIdent(_ [32]byte) (blobIdent, error) {
return blobIdent{}, nil
}
func (*mockLayout) summary(_ [32]byte) BlobStorageSummary {
return BlobStorageSummary{}
}
func (*mockLayout) notify(blobIdent) error {
return nil
}
func (m *mockLayout) pruneBefore(before primitives.Epoch) (*pruneSummary, error) {
return m.pruneBeforeFunc(before)
}
func (*mockLayout) remove(ident blobIdent) (int, error) {
return 0, nil
}
var _ fsLayout = &mockLayout{}
func TestCleaner(t *testing.T) {
l := &periodicEpochLayout{}
p := l.periodDir(11235813)
e := l.epochDir(11235813)
dc := newDirCleaner()
dc.add(p)
require.Equal(t, 2, dc.maxDepth)
dc.add(e)
require.Equal(t, 3, dc.maxDepth)
}

View File

@@ -0,0 +1,180 @@
package filesystem
import (
"os"
"testing"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
"github.com/prysmaticlabs/prysm/v5/testing/require"
"github.com/prysmaticlabs/prysm/v5/testing/util"
"github.com/prysmaticlabs/prysm/v5/time/slots"
"github.com/spf13/afero"
)
func ezIdent(t *testing.T, rootStr string, epoch primitives.Epoch, index uint64) blobIdent {
r, err := stringToRoot(rootStr)
require.NoError(t, err)
return blobIdent{root: r, epoch: epoch, index: index}
}
func setupTestBlobFile(t *testing.T, ident blobIdent, offset primitives.Slot, fs afero.Fs, l fsLayout) {
slot, err := slots.EpochStart(ident.epoch)
require.NoError(t, err)
slot += offset
_, sc := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, slot, 1)
scb, err := sc[0].MarshalSSZ()
require.NoError(t, err)
dir := l.dir(ident)
require.NoError(t, fs.MkdirAll(dir, directoryPermissions()))
p := l.sszPath(ident)
require.NoError(t, afero.WriteFile(fs, p, scb, 0666))
_, err = fs.Stat(p)
require.NoError(t, err)
}
type migrationTestTarget struct {
ident blobIdent
slotOffset primitives.Slot
migrated bool
path string
}
func testAssertFsMigrated(t *testing.T, fs afero.Fs, ident blobIdent, before, after fsLayout) {
// Assert the pre-migration path is gone.
_, err := fs.Stat(before.sszPath(ident))
require.ErrorIs(t, err, os.ErrNotExist)
dir := before.dir(ident)
_, err = listDir(fs, dir)
require.ErrorIs(t, err, os.ErrNotExist)
// Assert the post-migration path present.
_, err = fs.Stat(after.sszPath(ident))
require.NoError(t, err)
}
func TestMigrations(t *testing.T) {
cases := []struct {
name string
forwardLayout string
backwardLayout string
targets []migrationTestTarget
}{
{
name: "all need migration",
backwardLayout: LayoutNameFlat,
forwardLayout: LayoutNameByEpoch,
targets: []migrationTestTarget{
{
ident: ezIdent(t, "0x0125e54c64c925018c9296965a5b622d9f5ab626c10917860dcfb6aa09a0a00b", 1234, 0),
},
{
ident: ezIdent(t, "0x0127dba6fd30fdbb47e73e861d5c6e602b38ac3ddc945bb6a2fc4e10761e9a86", 5330, 0),
slotOffset: 31,
},
{
ident: ezIdent(t, "0x0127dba6fd30fdbb47e73e861d5c6e602b38ac3ddc945bb6a2fc4e10761e9a86", 5330, 1),
slotOffset: 31,
},
{
ident: ezIdent(t, "0x0232521756a0b965eab2c2245d7ad85feaeaf5f427cd14d1a7531f9d555b415c", 16777216, 0),
slotOffset: 16,
},
},
},
{
name: "mix old and new",
backwardLayout: LayoutNameFlat,
forwardLayout: LayoutNameByEpoch,
targets: []migrationTestTarget{
{
ident: ezIdent(t, "0x0125e54c64c925018c9296965a5b622d9f5ab626c10917860dcfb6aa09a0a00b", 1234, 0),
},
{
ident: ezIdent(t, "0x0127dba6fd30fdbb47e73e861d5c6e602b38ac3ddc945bb6a2fc4e10761e9a86", 5330, 0),
slotOffset: 31,
},
{
ident: ezIdent(t, "0x0127dba6fd30fdbb47e73e861d5c6e602b38ac3ddc945bb6a2fc4e10761e9a86", 5330, 1),
slotOffset: 31,
},
{
ident: ezIdent(t, "0x0232521756a0b965eab2c2245d7ad85feaeaf5f427cd14d1a7531f9d555b415c", 16777216, 0),
slotOffset: 16,
migrated: true,
},
{
ident: ezIdent(t, "0x0232521756a0b965eab2c2245d7ad85feaeaf5f427cd14d1a7531f9d555b415c", 16777216, 1),
slotOffset: 16,
migrated: true,
},
{
ident: ezIdent(t, "0x42eabe3d2c125410cd226de6f2825fb7575ab896c3f52e43de1fa29e4c809aba", 16777217, 0),
slotOffset: 16,
migrated: true,
},
{
ident: ezIdent(t, "0x666cea5034e22bd3b849cb33914cad59afd88ee08e4d5bc0e997411c945fbc1d", 11235, 1),
migrated: true,
},
},
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
t.Run("forward", func(t *testing.T) {
testMigration(t, c.forwardLayout, c.backwardLayout, c.targets)
})
// run the same test in reverse - to cover both directions while making the test table smaller.
t.Run("backward", func(t *testing.T) {
testMigration(t, c.forwardLayout, c.backwardLayout, c.targets)
})
})
}
}
func testMigration(t *testing.T, forwardName, backwardName string, targets []migrationTestTarget) {
fs := afero.NewMemMapFs()
cache := newBlobStorageCache()
forward, err := newLayout(forwardName, fs, cache, nil)
require.NoError(t, err)
backward, err := newLayout(backwardName, fs, cache, nil)
require.NoError(t, err)
for _, tar := range targets {
if tar.migrated {
setupTestBlobFile(t, tar.ident, tar.slotOffset, fs, forward)
} else {
setupTestBlobFile(t, tar.ident, tar.slotOffset, fs, backward)
}
}
require.NoError(t, migrateLayout(fs, backward, forward, cache))
for _, tar := range targets {
// Make sure the file wound up in the right spot, according to the forward layout
// and that the old file is gone, according to the backward layout.
testAssertFsMigrated(t, fs, tar.ident, backward, forward)
entry, ok := cache.get(tar.ident.root)
// we only expect cache to be populated here by files that needed to be moved.
if !tar.migrated {
require.Equal(t, true, ok)
require.Equal(t, true, entry.HasIndex(tar.ident.index))
require.Equal(t, tar.ident.epoch, entry.epoch)
}
}
// Run migration in reverse - testing "undo"
cache = newBlobStorageCache()
forward, err = newLayout(forwardName, fs, cache, nil)
require.NoError(t, err)
backward, err = newLayout(backwardName, fs, cache, nil)
require.NoError(t, err)
// forward and backward are flipped compared to the above
require.NoError(t, migrateLayout(fs, forward, backward, cache))
for _, tar := range targets {
// just like the above, but forward and backward are flipped
testAssertFsMigrated(t, fs, tar.ident, forward, backward)
entry, ok := cache.get(tar.ident.root)
require.Equal(t, true, ok)
require.Equal(t, true, entry.HasIndex(tar.ident.index))
require.Equal(t, tar.ident.epoch, entry.epoch)
}
}

View File

@@ -4,30 +4,41 @@ import (
"testing" "testing"
"github.com/prysmaticlabs/prysm/v5/config/params" "github.com/prysmaticlabs/prysm/v5/config/params"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
"github.com/prysmaticlabs/prysm/v5/time/slots"
"github.com/spf13/afero" "github.com/spf13/afero"
) )
// NewEphemeralBlobStorage should only be used for tests. // NewEphemeralBlobStorage should only be used for tests.
// The instance of BlobStorage returned is backed by an in-memory virtual filesystem, // The instance of BlobStorage returned is backed by an in-memory virtual filesystem,
// improving test performance and simplifying cleanup. // improving test performance and simplifying cleanup.
func NewEphemeralBlobStorage(t testing.TB) *BlobStorage { func NewEphemeralBlobStorage(t testing.TB, opts ...BlobStorageOption) *BlobStorage {
fs := afero.NewMemMapFs() return NewWarmedEphemeralBlobStorageUsingFs(t, afero.NewMemMapFs(), opts...)
pruner, err := newBlobPruner(fs, params.BeaconConfig().MinEpochsForBlobsSidecarsRequest, withWarmedCache())
if err != nil {
t.Fatal("test setup issue", err)
}
return &BlobStorage{fs: fs, pruner: pruner}
} }
// NewEphemeralBlobStorageWithFs can be used by tests that want access to the virtual filesystem // NewEphemeralBlobStorageAndFs can be used by tests that want access to the virtual filesystem
// in order to interact with it outside the parameters of the BlobStorage api. // in order to interact with it outside the parameters of the BlobStorage api.
func NewEphemeralBlobStorageWithFs(t testing.TB) (afero.Fs, *BlobStorage) { func NewEphemeralBlobStorageAndFs(t testing.TB, opts ...BlobStorageOption) (afero.Fs, *BlobStorage) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
pruner, err := newBlobPruner(fs, params.BeaconConfig().MinEpochsForBlobsSidecarsRequest, withWarmedCache()) bs := NewWarmedEphemeralBlobStorageUsingFs(t, fs, opts...)
return fs, bs
}
func NewEphemeralBlobStorageUsingFs(t testing.TB, fs afero.Fs, opts ...BlobStorageOption) *BlobStorage {
opts = append(opts,
WithBlobRetentionEpochs(params.BeaconConfig().MinEpochsForBlobsSidecarsRequest),
WithFs(fs))
bs, err := NewBlobStorage(opts...)
if err != nil { if err != nil {
t.Fatal("test setup issue", err) t.Fatalf("error initializing test BlobStorage, err=%s", err.Error())
} }
return fs, &BlobStorage{fs: fs, pruner: pruner} return bs
}
func NewWarmedEphemeralBlobStorageUsingFs(t testing.TB, fs afero.Fs, opts ...BlobStorageOption) *BlobStorage {
bs := NewEphemeralBlobStorageUsingFs(t, fs, opts...)
bs.WarmCache()
return bs
} }
type BlobMocker struct { type BlobMocker struct {
@@ -37,17 +48,9 @@ type BlobMocker struct {
// CreateFakeIndices creates empty blob sidecar files at the expected path for the given // CreateFakeIndices creates empty blob sidecar files at the expected path for the given
// root and indices to influence the result of Indices(). // root and indices to influence the result of Indices().
func (bm *BlobMocker) CreateFakeIndices(root [32]byte, indices ...uint64) error { func (bm *BlobMocker) CreateFakeIndices(root [32]byte, slot primitives.Slot, indices ...uint64) error {
for i := range indices { for i := range indices {
n := blobNamer{root: root, index: indices[i]} if err := bm.bs.layout.notify(newBlobIdent(root, slots.ToEpoch(slot), indices[i])); err != nil {
if err := bm.fs.MkdirAll(n.dir(), directoryPermissions); err != nil {
return err
}
f, err := bm.fs.Create(n.path())
if err != nil {
return err
}
if err := f.Close(); err != nil {
return err return err
} }
} }
@@ -56,9 +59,8 @@ func (bm *BlobMocker) CreateFakeIndices(root [32]byte, indices ...uint64) error
// NewEphemeralBlobStorageWithMocker returns a *BlobMocker value in addition to the BlobStorage value. // NewEphemeralBlobStorageWithMocker returns a *BlobMocker value in addition to the BlobStorage value.
// BlockMocker encapsulates things blob path construction to avoid leaking implementation details. // BlockMocker encapsulates things blob path construction to avoid leaking implementation details.
func NewEphemeralBlobStorageWithMocker(_ testing.TB) (*BlobMocker, *BlobStorage) { func NewEphemeralBlobStorageWithMocker(t testing.TB) (*BlobMocker, *BlobStorage) {
fs := afero.NewMemMapFs() fs, bs := NewEphemeralBlobStorageAndFs(t)
bs := &BlobStorage{fs: fs}
return &BlobMocker{fs: fs, bs: bs}, bs return &BlobMocker{fs: fs, bs: bs}, bs
} }
@@ -66,7 +68,7 @@ func NewMockBlobStorageSummarizer(t *testing.T, set map[[32]byte][]int) BlobStor
c := newBlobStorageCache() c := newBlobStorageCache()
for k, v := range set { for k, v := range set {
for i := range v { for i := range v {
if err := c.ensure(k, 0, uint64(v[i])); err != nil { if err := c.ensure(blobIdent{root: k, epoch: 0, index: uint64(v[i])}); err != nil {
t.Fatal(err) t.Fatal(err)
} }
} }

View File

@@ -1,319 +1,67 @@
package filesystem package filesystem
import ( import (
"context"
"encoding/binary"
"io"
"path"
"path/filepath"
"strconv"
"strings"
"sync" "sync"
"sync/atomic" "sync/atomic"
"time" "time"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/prysmaticlabs/prysm/v5/config/params"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives" "github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
"github.com/prysmaticlabs/prysm/v5/time/slots"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
"github.com/spf13/afero"
) )
const retentionBuffer primitives.Epoch = 2 const retentionBuffer primitives.Epoch = 2
var ( var errNotBlobSSZ = errors.New("not a blob ssz file")
errPruningFailures = errors.New("blobs could not be pruned for some roots")
errNotBlobSSZ = errors.New("not a blob ssz file")
)
// blobPruner keeps track of the tail end of the retention period, based only the blobs it has seen via the notify method.
// If the retention period advances in response to notify being called,
// the pruner will invoke the pruneBefore method of the given layout in a new goroutine.
// The details of pruning are left entirely to the layout, with the pruner's only responsibility being to
// schedule just one pruning operation at a time, for each forward movement of the minimum retention epoch.
type blobPruner struct { type blobPruner struct {
sync.Mutex mu sync.Mutex
prunedBefore atomic.Uint64 prunedBefore atomic.Uint64
windowSize primitives.Slot retentionPeriod primitives.Epoch
cache *blobStorageCache
cacheReady chan struct{}
warmed bool
fs afero.Fs
} }
type prunerOpt func(*blobPruner) error func newBlobPruner(retain primitives.Epoch) *blobPruner {
p := &blobPruner{retentionPeriod: retain + retentionBuffer}
func withWarmedCache() prunerOpt { return p
return func(p *blobPruner) error {
return p.warmCache()
}
} }
func newBlobPruner(fs afero.Fs, retain primitives.Epoch, opts ...prunerOpt) (*blobPruner, error) { // notify returns a channel that is closed when the pruning operation is complete.
r, err := slots.EpochStart(retain + retentionBuffer) // This is useful for tests, but at runtime fsLayouts or BlobStorage should not wait for completion.
if err != nil { func (p *blobPruner) notify(latest primitives.Epoch, layout fsLayout) chan struct{} {
return nil, errors.Wrap(err, "could not set retentionSlots") done := make(chan struct{})
} floor := periodFloor(latest, p.retentionPeriod)
cw := make(chan struct{}) if primitives.Epoch(p.prunedBefore.Swap(uint64(floor))) >= floor {
p := &blobPruner{fs: fs, windowSize: r, cache: newBlobStorageCache(), cacheReady: cw} // Only trigger pruning if the atomic swap changed the previous value of prunedBefore.
for _, o := range opts { close(done)
if err := o(p); err != nil { return done
return nil, err
}
}
return p, nil
}
// notify updates the pruner's view of root->blob mappings. This allows the pruner to build a cache
// of root->slot mappings and decide when to evict old blobs based on the age of present blobs.
func (p *blobPruner) notify(root [32]byte, latest primitives.Slot, idx uint64) error {
if err := p.cache.ensure(root, latest, idx); err != nil {
return err
}
pruned := uint64(windowMin(latest, p.windowSize))
if p.prunedBefore.Swap(pruned) == pruned {
return nil
} }
go func() { go func() {
p.Lock() p.mu.Lock()
defer p.Unlock() start := time.Now()
if err := p.prune(primitives.Slot(pruned)); err != nil { defer p.mu.Unlock()
log.WithError(err).Errorf("Failed to prune blobs from slot %d", latest) sum, err := layout.pruneBefore(floor)
if err != nil {
log.WithError(err).WithFields(sum.LogFields()).Warn("Encountered errors during blob pruning.")
} }
log.WithFields(logrus.Fields{
"upToEpoch": floor,
"duration": time.Since(start).String(),
"filesRemoved": sum.blobsPruned,
}).Debug("Pruned old blobs")
blobsPrunedCounter.Add(float64(sum.blobsPruned))
close(done)
}() }()
return nil return done
} }
func windowMin(latest, offset primitives.Slot) primitives.Slot { func periodFloor(latest, period primitives.Epoch) primitives.Epoch {
// Safely compute the first slot in the epoch for the latest slot if latest < period {
latest = latest - latest%params.BeaconConfig().SlotsPerEpoch
if latest < offset {
return 0 return 0
} }
return latest - offset return latest - period
}
func (p *blobPruner) warmCache() error {
p.Lock()
defer func() {
if !p.warmed {
p.warmed = true
close(p.cacheReady)
}
p.Unlock()
}()
if err := p.prune(0); err != nil {
return err
}
return nil
}
func (p *blobPruner) waitForCache(ctx context.Context) (*blobStorageCache, error) {
select {
case <-p.cacheReady:
return p.cache, nil
case <-ctx.Done():
return nil, ctx.Err()
}
}
// Prune prunes blobs in the base directory based on the retention epoch.
// It deletes blobs older than currentEpoch - (retentionEpochs+bufferEpochs).
// This is so that we keep a slight buffer and blobs are deleted after n+2 epochs.
func (p *blobPruner) prune(pruneBefore primitives.Slot) error {
start := time.Now()
totalPruned, totalErr := 0, 0
// Customize logging/metrics behavior for the initial cache warmup when slot=0.
// We'll never see a prune request for slot 0, unless this is the initial call to warm up the cache.
if pruneBefore == 0 {
defer func() {
log.WithField("duration", time.Since(start).String()).Debug("Warmed up pruner cache")
}()
} else {
defer func() {
log.WithFields(logrus.Fields{
"upToEpoch": slots.ToEpoch(pruneBefore),
"duration": time.Since(start).String(),
"filesRemoved": totalPruned,
}).Debug("Pruned old blobs")
blobsPrunedCounter.Add(float64(totalPruned))
}()
}
entries, err := listDir(p.fs, ".")
if err != nil {
return errors.Wrap(err, "unable to list root blobs directory")
}
dirs := filter(entries, filterRoot)
for _, dir := range dirs {
pruned, err := p.tryPruneDir(dir, pruneBefore)
if err != nil {
totalErr += 1
log.WithError(err).WithField("directory", dir).Error("Unable to prune directory")
}
totalPruned += pruned
}
if totalErr > 0 {
return errors.Wrapf(errPruningFailures, "pruning failed for %d root directories", totalErr)
}
return nil
}
func shouldRetain(slot, pruneBefore primitives.Slot) bool {
return slot >= pruneBefore
}
func (p *blobPruner) tryPruneDir(dir string, pruneBefore primitives.Slot) (int, error) {
root, err := rootFromDir(dir)
if err != nil {
return 0, errors.Wrapf(err, "invalid directory, could not parse subdir as root %s", dir)
}
slot, slotCached := p.cache.slot(root)
// Return early if the slot is cached and doesn't need pruning.
if slotCached && shouldRetain(slot, pruneBefore) {
return 0, nil
}
// entries will include things that aren't ssz files, like dangling .part files. We need these to
// completely clean up the directory.
entries, err := listDir(p.fs, dir)
if err != nil {
return 0, errors.Wrapf(err, "failed to list blobs in directory %s", dir)
}
// scFiles filters the dir listing down to the ssz encoded BlobSidecar files. This allows us to peek
// at the first one in the list to figure out the slot.
scFiles := filter(entries, filterSsz)
if len(scFiles) == 0 {
log.WithField("dir", dir).Warn("Pruner ignoring directory with no blob files")
return 0, nil
}
if !slotCached {
slot, err = slotFromFile(path.Join(dir, scFiles[0]), p.fs)
if err != nil {
return 0, errors.Wrapf(err, "slot could not be read from blob file %s", scFiles[0])
}
for i := range scFiles {
idx, err := idxFromPath(scFiles[i])
if err != nil {
return 0, errors.Wrapf(err, "index could not be determined for blob file %s", scFiles[i])
}
if err := p.cache.ensure(root, slot, idx); err != nil {
return 0, errors.Wrapf(err, "could not update prune cache for blob file %s", scFiles[i])
}
}
if shouldRetain(slot, pruneBefore) {
return 0, nil
}
}
removed := 0
for _, fname := range entries {
fullName := path.Join(dir, fname)
if err := p.fs.Remove(fullName); err != nil {
return removed, errors.Wrapf(err, "unable to remove %s", fullName)
}
// Don't count other files that happen to be in the dir, like dangling .part files.
if filterSsz(fname) {
removed += 1
}
// Log a warning whenever we clean up a .part file
if filterPart(fullName) {
log.WithField("file", fullName).Warn("Deleting abandoned blob .part file")
}
}
if err := p.fs.Remove(dir); err != nil {
return removed, errors.Wrapf(err, "unable to remove blob directory %s", dir)
}
p.cache.evict(root)
return len(scFiles), nil
}
func idxFromPath(fname string) (uint64, error) {
fname = path.Base(fname)
if filepath.Ext(fname) != dotSszExt {
return 0, errors.Wrap(errNotBlobSSZ, "does not have .ssz extension")
}
parts := strings.Split(fname, ".")
if len(parts) != 2 {
return 0, errors.Wrap(errNotBlobSSZ, "unexpected filename structure (want <index>.ssz)")
}
return strconv.ParseUint(parts[0], 10, 64)
}
func rootFromDir(dir string) ([32]byte, error) {
subdir := filepath.Base(dir) // end of the path should be the blob directory, named by hex encoding of root
root, err := stringToRoot(subdir)
if err != nil {
return root, errors.Wrapf(err, "invalid directory, could not parse subdir as root %s", dir)
}
return root, nil
}
// Read slot from marshaled BlobSidecar data in the given file. See slotFromBlob for details.
func slotFromFile(file string, fs afero.Fs) (primitives.Slot, error) {
f, err := fs.Open(file)
if err != nil {
return 0, err
}
defer func() {
if err := f.Close(); err != nil {
log.WithError(err).Errorf("Could not close blob file")
}
}()
return slotFromBlob(f)
}
// slotFromBlob reads the ssz data of a file at the specified offset (8 + 131072 + 48 + 48 = 131176 bytes),
// which is calculated based on the size of the BlobSidecar struct and is based on the size of the fields
// preceding the slot information within SignedBeaconBlockHeader.
func slotFromBlob(at io.ReaderAt) (primitives.Slot, error) {
b := make([]byte, 8)
_, err := at.ReadAt(b, 131176)
if err != nil {
return 0, err
}
rawSlot := binary.LittleEndian.Uint64(b)
return primitives.Slot(rawSlot), nil
}
func listDir(fs afero.Fs, dir string) ([]string, error) {
top, err := fs.Open(dir)
if err != nil {
return nil, errors.Wrap(err, "failed to open directory descriptor")
}
defer func() {
if err := top.Close(); err != nil {
log.WithError(err).Errorf("Could not close file %s", dir)
}
}()
// re the -1 param: "If n <= 0, Readdirnames returns all the names from the directory in a single slice"
dirs, err := top.Readdirnames(-1)
if err != nil {
return nil, errors.Wrap(err, "failed to read directory listing")
}
return dirs, nil
}
func filter(entries []string, filt func(string) bool) []string {
filtered := make([]string, 0, len(entries))
for i := range entries {
if filt(entries[i]) {
filtered = append(filtered, entries[i])
}
}
return filtered
}
func filterRoot(s string) bool {
return strings.HasPrefix(s, "0x")
}
var dotSszExt = "." + sszExt
var dotPartExt = "." + partExt
func filterSsz(s string) bool {
return filepath.Ext(s) == dotSszExt
}
func filterPart(s string) bool {
return filepath.Ext(s) == dotPartExt
} }

View File

@@ -1,394 +1,197 @@
package filesystem package filesystem
import ( import (
"bytes" "encoding/binary"
"context"
"fmt"
"math"
"os" "os"
"path"
"sort"
"testing" "testing"
"time"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/verification" "github.com/prysmaticlabs/prysm/v5/beacon-chain/verification"
"github.com/prysmaticlabs/prysm/v5/config/params" "github.com/prysmaticlabs/prysm/v5/config/params"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives" "github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
"github.com/prysmaticlabs/prysm/v5/encoding/bytesutil"
"github.com/prysmaticlabs/prysm/v5/testing/require" "github.com/prysmaticlabs/prysm/v5/testing/require"
"github.com/prysmaticlabs/prysm/v5/testing/util" "github.com/prysmaticlabs/prysm/v5/testing/util"
"github.com/prysmaticlabs/prysm/v5/time/slots"
"github.com/spf13/afero" "github.com/spf13/afero"
) )
func TestTryPruneDir_CachedNotExpired(t *testing.T) { type prunerScenario struct {
fs := afero.NewMemMapFs() name string
pr, err := newBlobPruner(fs, 0) prunedBefore primitives.Epoch
require.NoError(t, err) retentionPeriod primitives.Epoch
slot := pr.windowSize latest primitives.Epoch
_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, slot, params.BeaconConfig().MaxBlobsPerBlock(slot)) expected pruneExpectation
sc, err := verification.BlobSidecarNoop(sidecars[0])
require.NoError(t, err)
rootStr := rootString(sc.BlockRoot())
// This slot is right on the edge of what would need to be pruned, so by adding it to the cache and
// skipping any other test setup, we can be certain the hot cache path never touches the filesystem.
require.NoError(t, pr.cache.ensure(sc.BlockRoot(), sc.Slot(), 0))
pruned, err := pr.tryPruneDir(rootStr, pr.windowSize)
require.NoError(t, err)
require.Equal(t, 0, pruned)
} }
func TestCacheWarmFail(t *testing.T) { type pruneExpectation struct {
fs := afero.NewMemMapFs() called bool
n := blobNamer{root: bytesutil.ToBytes32([]byte("derp")), index: 0} arg primitives.Epoch
bp := n.path() summary *pruneSummary
mkdir := path.Dir(bp) err error
require.NoError(t, fs.MkdirAll(mkdir, directoryPermissions))
// Create an empty blob index in the fs by touching the file at a seemingly valid path.
fi, err := fs.Create(bp)
require.NoError(t, err)
require.NoError(t, fi.Close())
// Cache warm should fail due to the unexpected EOF.
pr, err := newBlobPruner(fs, 0)
require.NoError(t, err)
require.ErrorIs(t, pr.warmCache(), errPruningFailures)
// The cache warm has finished, so calling waitForCache with a super short deadline
// should not block or hit the context deadline.
ctx := context.Background()
ctx, cancel := context.WithDeadline(ctx, time.Now().Add(1*time.Millisecond))
defer cancel()
c, err := pr.waitForCache(ctx)
// We will get an error and a nil value for the cache if we hit the deadline.
require.NoError(t, err)
require.NotNil(t, c)
} }
func TestTryPruneDir_CachedExpired(t *testing.T) { func (e *pruneExpectation) record(before primitives.Epoch) (*pruneSummary, error) {
t.Run("empty directory", func(t *testing.T) { e.called = true
fs := afero.NewMemMapFs() e.arg = before
pr, err := newBlobPruner(fs, 0) if e.summary == nil {
require.NoError(t, err) e.summary = &pruneSummary{}
var slot primitives.Slot = 0 }
_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, slot, 1) return e.summary, e.err
sc, err := verification.BlobSidecarNoop(sidecars[0])
require.NoError(t, err)
rootStr := rootString(sc.BlockRoot())
require.NoError(t, fs.Mkdir(rootStr, directoryPermissions)) // make empty directory
require.NoError(t, pr.cache.ensure(sc.BlockRoot(), sc.Slot(), 0))
pruned, err := pr.tryPruneDir(rootStr, slot+1)
require.NoError(t, err)
require.Equal(t, 0, pruned)
})
t.Run("blobs to delete", func(t *testing.T) {
fs, bs := NewEphemeralBlobStorageWithFs(t)
var slot primitives.Slot = 0
_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, slot, 2)
scs, err := verification.BlobSidecarSliceNoop(sidecars)
require.NoError(t, err)
require.NoError(t, bs.Save(scs[0]))
require.NoError(t, bs.Save(scs[1]))
// check that the root->slot is cached
root := scs[0].BlockRoot()
rootStr := rootString(root)
cs, cok := bs.pruner.cache.slot(scs[0].BlockRoot())
require.Equal(t, true, cok)
require.Equal(t, slot, cs)
// ensure that we see the saved files in the filesystem
files, err := listDir(fs, rootStr)
require.NoError(t, err)
require.Equal(t, 2, len(files))
pruned, err := bs.pruner.tryPruneDir(rootStr, slot+1)
require.NoError(t, err)
require.Equal(t, 2, pruned)
files, err = listDir(fs, rootStr)
require.ErrorIs(t, err, os.ErrNotExist)
require.Equal(t, 0, len(files))
})
} }
func TestTryPruneDir_SlotFromFile(t *testing.T) { func TestPrunerNotify(t *testing.T) {
t.Run("expired blobs deleted", func(t *testing.T) { defaultRetention := params.BeaconConfig().MinEpochsForBlobsSidecarsRequest
fs, bs := NewEphemeralBlobStorageWithFs(t) cases := []prunerScenario{
var slot primitives.Slot = 0 {
_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, slot, 2) name: "last epoch of period",
scs, err := verification.BlobSidecarSliceNoop(sidecars) retentionPeriod: defaultRetention,
require.NoError(t, err) prunedBefore: 11235,
latest: defaultRetention + 11235,
require.NoError(t, bs.Save(scs[0])) expected: pruneExpectation{called: false},
require.NoError(t, bs.Save(scs[1])) },
{
// check that the root->slot is cached name: "within period",
root := scs[0].BlockRoot() retentionPeriod: defaultRetention,
rootStr := rootString(root) prunedBefore: 11235,
cs, ok := bs.pruner.cache.slot(root) latest: 11235 + defaultRetention - 1,
require.Equal(t, true, ok) expected: pruneExpectation{called: false},
require.Equal(t, slot, cs) },
// evict it from the cache so that we trigger the file read path {
bs.pruner.cache.evict(root) name: "triggers",
_, ok = bs.pruner.cache.slot(root) retentionPeriod: defaultRetention,
require.Equal(t, false, ok) prunedBefore: 11235,
latest: 11235 + 1 + defaultRetention,
// ensure that we see the saved files in the filesystem expected: pruneExpectation{called: true, arg: 11235 + 1},
files, err := listDir(fs, rootStr) },
require.NoError(t, err) {
require.Equal(t, 2, len(files)) name: "from zero - before first period",
retentionPeriod: defaultRetention,
pruned, err := bs.pruner.tryPruneDir(rootStr, slot+1) prunedBefore: 0,
require.NoError(t, err) latest: defaultRetention - 1,
require.Equal(t, 2, pruned) expected: pruneExpectation{called: false},
files, err = listDir(fs, rootStr) },
require.ErrorIs(t, err, os.ErrNotExist) {
require.Equal(t, 0, len(files)) name: "from zero - at boundary",
}) retentionPeriod: defaultRetention,
t.Run("not expired, intact", func(t *testing.T) { prunedBefore: 0,
fs, bs := NewEphemeralBlobStorageWithFs(t) latest: defaultRetention,
// Set slot equal to the window size, so it should be retained. expected: pruneExpectation{called: false},
slot := bs.pruner.windowSize },
_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, slot, 2) {
scs, err := verification.BlobSidecarSliceNoop(sidecars) name: "from zero - triggers",
require.NoError(t, err) retentionPeriod: defaultRetention,
prunedBefore: 0,
require.NoError(t, bs.Save(scs[0])) latest: defaultRetention + 1,
require.NoError(t, bs.Save(scs[1])) expected: pruneExpectation{called: true, arg: 1},
},
// Evict slot mapping from the cache so that we trigger the file read path.
root := scs[0].BlockRoot()
rootStr := rootString(root)
bs.pruner.cache.evict(root)
_, ok := bs.pruner.cache.slot(root)
require.Equal(t, false, ok)
// Ensure that we see the saved files in the filesystem.
files, err := listDir(fs, rootStr)
require.NoError(t, err)
require.Equal(t, 2, len(files))
// This should use the slotFromFile code (simulating restart).
// Setting pruneBefore == slot, so that the slot will be outside the window (at the boundary).
pruned, err := bs.pruner.tryPruneDir(rootStr, slot)
require.NoError(t, err)
require.Equal(t, 0, pruned)
// Ensure files are still present.
files, err = listDir(fs, rootStr)
require.NoError(t, err)
require.Equal(t, 2, len(files))
})
}
func TestSlotFromBlob(t *testing.T) {
cases := []struct {
slot primitives.Slot
}{
{slot: 0},
{slot: 2},
{slot: 1123581321},
{slot: math.MaxUint64},
} }
for _, c := range cases { for _, c := range cases {
t.Run(fmt.Sprintf("slot %d", c.slot), func(t *testing.T) { t.Run(c.name, func(t *testing.T) {
_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, c.slot, 1) actual := &pruneExpectation{}
sc := sidecars[0] l := &mockLayout{pruneBeforeFunc: actual.record}
enc, err := sc.MarshalSSZ() pruner := &blobPruner{retentionPeriod: c.retentionPeriod}
require.NoError(t, err) pruner.prunedBefore.Store(uint64(c.prunedBefore))
slot, err := slotFromBlob(bytes.NewReader(enc)) done := pruner.notify(c.latest, l)
require.NoError(t, err) <-done
require.Equal(t, c.slot, slot) require.Equal(t, c.expected.called, actual.called)
require.Equal(t, c.expected.arg, actual.arg)
}) })
} }
} }
func TestSlotFromFile(t *testing.T) { func testSetupBlobIdentPaths(t *testing.T, fs afero.Fs, bs *BlobStorage, idents []testIdent) []blobIdent {
cases := []struct { created := make([]blobIdent, len(idents))
slot primitives.Slot for i, id := range idents {
}{ slot, err := slots.EpochStart(id.epoch)
{slot: 0},
{slot: 2},
{slot: 1123581321},
{slot: math.MaxUint64},
}
for _, c := range cases {
t.Run(fmt.Sprintf("slot %d", c.slot), func(t *testing.T) {
fs, bs := NewEphemeralBlobStorageWithFs(t)
_, sidecars := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, c.slot, 1)
sc, err := verification.BlobSidecarNoop(sidecars[0])
require.NoError(t, err)
require.NoError(t, bs.Save(sc))
fname := namerForSidecar(sc)
sszPath := fname.path()
slot, err := slotFromFile(sszPath, fs)
require.NoError(t, err)
require.Equal(t, c.slot, slot)
})
}
}
type dirFiles struct {
name string
isDir bool
children []dirFiles
}
func (df dirFiles) reify(t *testing.T, fs afero.Fs, base string) {
fullPath := path.Join(base, df.name)
if df.isDir {
if df.name != "" {
require.NoError(t, fs.Mkdir(fullPath, directoryPermissions))
}
for _, c := range df.children {
c.reify(t, fs, fullPath)
}
} else {
fp, err := fs.Create(fullPath)
require.NoError(t, err) require.NoError(t, err)
_, err = fp.WriteString("derp") slot += id.offset
_, scs := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, slot, 1)
sc := verification.FakeVerifyForTest(t, scs[0])
require.NoError(t, bs.Save(sc))
ident := identForSidecar(sc)
_, err = fs.Stat(bs.layout.sszPath(ident))
require.NoError(t, err)
created[i] = ident
}
return created
}
func testAssertBlobsPruned(t *testing.T, fs afero.Fs, bs *BlobStorage, pruned, remain []blobIdent) {
for _, id := range pruned {
_, err := fs.Stat(bs.layout.sszPath(id))
require.NotNil(t, err)
require.Equal(t, true, os.IsNotExist(err))
}
for _, id := range remain {
_, err := fs.Stat(bs.layout.sszPath(id))
require.NoError(t, err) require.NoError(t, err)
} }
} }
func (df dirFiles) childNames() []string { type testIdent struct {
cn := make([]string, len(df.children)) blobIdent
for i := range df.children { offset primitives.Slot
cn[i] = df.children[i].name
}
return cn
} }
func TestListDir(t *testing.T) { func testRoots(n int) [][32]byte {
fs := afero.NewMemMapFs() roots := make([][32]byte, n)
for i := range roots {
// parent directory binary.LittleEndian.PutUint32(roots[i][:], uint32(1+i))
fsLayout := dirFiles{isDir: true}
// break out each subdir for easier assertions
notABlob := dirFiles{name: "notABlob", isDir: true}
childlessBlob := dirFiles{name: "0x0987654321", isDir: true}
blobWithSsz := dirFiles{name: "0x1123581321", isDir: true,
children: []dirFiles{{name: "1.ssz"}, {name: "2.ssz"}},
} }
blobWithSszAndTmp := dirFiles{name: "0x1234567890", isDir: true, return roots
children: []dirFiles{{name: "5.ssz"}, {name: "0.part"}}} }
fsLayout.children = append(fsLayout.children,
notABlob, childlessBlob, blobWithSsz, blobWithSszAndTmp)
topChildren := make([]string, len(fsLayout.children)) func TestLayoutPruneBefore(t *testing.T) {
for i := range fsLayout.children { roots := testRoots(10)
topChildren[i] = fsLayout.children[i].name
}
fsLayout.reify(t, fs, "")
cases := []struct { cases := []struct {
name string name string
dirPath string pruned []testIdent
expected []string remain []testIdent
filter func(string) bool pruneBefore primitives.Epoch
err error err error
sum pruneSummary
}{ }{
{ {
name: "non-existent", name: "none pruned",
dirPath: "derp", pruneBefore: 1,
expected: []string{}, pruned: []testIdent{},
err: os.ErrNotExist, remain: []testIdent{
}, {offset: 1, blobIdent: blobIdent{root: roots[0], epoch: 1, index: 0}},
{ {offset: 1, blobIdent: blobIdent{root: roots[1], epoch: 1, index: 0}},
name: "empty",
dirPath: childlessBlob.name,
expected: []string{},
},
{
name: "top",
dirPath: ".",
expected: topChildren,
},
{
name: "custom filter: only notABlob",
dirPath: ".",
expected: []string{notABlob.name},
filter: func(s string) bool {
return s == notABlob.name
}, },
}, },
{ {
name: "root filter", name: "expected pruned before epoch",
dirPath: ".", pruneBefore: 3,
expected: []string{childlessBlob.name, blobWithSsz.name, blobWithSszAndTmp.name}, pruned: []testIdent{
filter: filterRoot, {offset: 0, blobIdent: blobIdent{root: roots[0], epoch: 1, index: 0}},
}, {offset: 31, blobIdent: blobIdent{root: roots[1], epoch: 1, index: 5}},
{ {offset: 0, blobIdent: blobIdent{root: roots[2], epoch: 2, index: 0}},
name: "ssz filter", {offset: 31, blobIdent: blobIdent{root: roots[3], epoch: 2, index: 3}},
dirPath: blobWithSsz.name, },
expected: blobWithSsz.childNames(), remain: []testIdent{
filter: filterSsz, {offset: 0, blobIdent: blobIdent{root: roots[4], epoch: 3, index: 2}}, // boundary
}, {offset: 31, blobIdent: blobIdent{root: roots[5], epoch: 3, index: 0}}, // boundary
{ {offset: 0, blobIdent: blobIdent{root: roots[6], epoch: 4, index: 1}},
name: "ssz mixed filter", {offset: 31, blobIdent: blobIdent{root: roots[7], epoch: 4, index: 5}},
dirPath: blobWithSszAndTmp.name, },
expected: []string{"5.ssz"}, sum: pruneSummary{blobsPruned: 4},
filter: filterSsz,
}, },
} }
for _, c := range cases { for _, c := range cases {
t.Run(c.name, func(t *testing.T) { t.Run(c.name, func(t *testing.T) {
result, err := listDir(fs, c.dirPath) fs, bs := NewEphemeralBlobStorageAndFs(t, WithLayout(LayoutNameByEpoch))
if c.filter != nil { pruned := testSetupBlobIdentPaths(t, fs, bs, c.pruned)
result = filter(result, c.filter) remain := testSetupBlobIdentPaths(t, fs, bs, c.remain)
} sum, err := bs.layout.pruneBefore(c.pruneBefore)
if c.err != nil {
require.ErrorIs(t, err, c.err)
require.Equal(t, 0, len(result))
} else {
require.NoError(t, err)
sort.Strings(c.expected)
sort.Strings(result)
require.DeepEqual(t, c.expected, result)
}
})
}
}
func TestRootFromDir(t *testing.T) {
cases := []struct {
name string
dir string
err error
root [32]byte
}{
{
name: "happy path",
dir: "0xffff875e1d985c5ccb214894983f2428edb271f0f87b68ba7010e4a99df3b5cb",
root: [32]byte{255, 255, 135, 94, 29, 152, 92, 92, 203, 33, 72, 148, 152, 63, 36, 40,
237, 178, 113, 240, 248, 123, 104, 186, 112, 16, 228, 169, 157, 243, 181, 203},
},
{
name: "too short",
dir: "0xffff875e1d985c5ccb214894983f2428edb271f0f87b68ba7010e4a99df3b5c",
err: errInvalidRootString,
},
{
name: "too log",
dir: "0xffff875e1d985c5ccb214894983f2428edb271f0f87b68ba7010e4a99df3b5cbb",
err: errInvalidRootString,
},
{
name: "missing prefix",
dir: "ffff875e1d985c5ccb214894983f2428edb271f0f87b68ba7010e4a99df3b5cb",
err: errInvalidRootString,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
root, err := stringToRoot(c.dir)
if c.err != nil { if c.err != nil {
require.ErrorIs(t, err, c.err) require.ErrorIs(t, err, c.err)
return return
} }
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, c.root, root) testAssertBlobsPruned(t, fs, bs, pruned, remain)
require.Equal(t, c.sum.blobsPruned, sum.blobsPruned)
require.Equal(t, len(c.pruned), sum.blobsPruned)
require.Equal(t, len(c.sum.failedRemovals), len(sum.failedRemovals))
}) })
} }
} }

View File

@@ -103,6 +103,7 @@ go_test(
"//beacon-chain/core/helpers:go_default_library", "//beacon-chain/core/helpers:go_default_library",
"//beacon-chain/core/signing:go_default_library", "//beacon-chain/core/signing:go_default_library",
"//beacon-chain/db:go_default_library", "//beacon-chain/db:go_default_library",
"//beacon-chain/db/filesystem:go_default_library",
"//beacon-chain/db/testing:go_default_library", "//beacon-chain/db/testing:go_default_library",
"//beacon-chain/execution/testing:go_default_library", "//beacon-chain/execution/testing:go_default_library",
"//beacon-chain/execution/types:go_default_library", "//beacon-chain/execution/types:go_default_library",

View File

@@ -105,7 +105,7 @@ type Reconstructor interface {
ReconstructFullBellatrixBlockBatch( ReconstructFullBellatrixBlockBatch(
ctx context.Context, blindedBlocks []interfaces.ReadOnlySignedBeaconBlock, ctx context.Context, blindedBlocks []interfaces.ReadOnlySignedBeaconBlock,
) ([]interfaces.SignedBeaconBlock, error) ) ([]interfaces.SignedBeaconBlock, error)
ReconstructBlobSidecars(ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, indices []bool) ([]blocks.VerifiedROBlob, error) ReconstructBlobSidecars(ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, hi func(uint64) bool) ([]blocks.VerifiedROBlob, error)
} }
// EngineCaller defines a client that can interact with an Ethereum // EngineCaller defines a client that can interact with an Ethereum
@@ -531,31 +531,20 @@ func (s *Service) ReconstructFullBellatrixBlockBatch(
// It retrieves the KZG commitments from the block body, fetches the associated blobs and proofs, // It retrieves the KZG commitments from the block body, fetches the associated blobs and proofs,
// and constructs the corresponding verified read-only blob sidecars. // and constructs the corresponding verified read-only blob sidecars.
// //
// The 'exists' argument is a boolean list (must be the same length as body.BlobKzgCommitments), where each element corresponds to whether a // The 'hasIndex' argument is a function returns true if the given uint64 blob index already exists on disc.
// particular blob sidecar already exists. If exists[i] is true, the blob for the i-th KZG commitment // Only the blobs that do not already exist (where hasIndex(i) is false)
// has already been retrieved and does not need to be fetched again from the execution layer (EL). // will be fetched from the execution engine using the KZG commitments from block body.
// func (s *Service) ReconstructBlobSidecars(ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, hasIndex func(uint64) bool) ([]blocks.VerifiedROBlob, error) {
// For example:
// - len(block.Body().BlobKzgCommitments()) == 6
// - If exists = [true, false, true, false, true, false], the function will fetch the blobs
// associated with indices 1, 3, and 5 (since those are marked as non-existent).
// - If exists = [false ... x 6], the function will attempt to fetch all blobs.
//
// Only the blobs that do not already exist (where exists[i] is false) are fetched using the KZG commitments from block body.
func (s *Service) ReconstructBlobSidecars(ctx context.Context, block interfaces.ReadOnlySignedBeaconBlock, blockRoot [32]byte, exists []bool) ([]blocks.VerifiedROBlob, error) {
blockBody := block.Block().Body() blockBody := block.Block().Body()
kzgCommitments, err := blockBody.BlobKzgCommitments() kzgCommitments, err := blockBody.BlobKzgCommitments()
if err != nil { if err != nil {
return nil, errors.Wrap(err, "could not get blob KZG commitments") return nil, errors.Wrap(err, "could not get blob KZG commitments")
} }
if len(kzgCommitments) > len(exists) {
return nil, fmt.Errorf("length of KZG commitments (%d) is greater than length of exists (%d)", len(kzgCommitments), len(exists))
}
// Collect KZG hashes for non-existing blobs // Collect KZG hashes for non-existing blobs
var kzgHashes []common.Hash var kzgHashes []common.Hash
for i, commitment := range kzgCommitments { for i, commitment := range kzgCommitments {
if !exists[i] { if !hasIndex(uint64(i)) {
kzgHashes = append(kzgHashes, primitives.ConvertKzgCommitmentToVersionedHash(commitment)) kzgHashes = append(kzgHashes, primitives.ConvertKzgCommitmentToVersionedHash(commitment))
} }
} }
@@ -580,7 +569,7 @@ func (s *Service) ReconstructBlobSidecars(ctx context.Context, block interfaces.
// Reconstruct verified blob sidecars // Reconstruct verified blob sidecars
var verifiedBlobs []blocks.VerifiedROBlob var verifiedBlobs []blocks.VerifiedROBlob
for i, blobIndex := 0, 0; i < len(kzgCommitments); i++ { for i, blobIndex := 0, 0; i < len(kzgCommitments); i++ {
if exists[i] { if hasIndex(uint64(i)) {
continue continue
} }

View File

@@ -20,6 +20,7 @@ import (
"github.com/ethereum/go-ethereum/rpc" "github.com/ethereum/go-ethereum/rpc"
"github.com/holiman/uint256" "github.com/holiman/uint256"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/db/filesystem"
mocks "github.com/prysmaticlabs/prysm/v5/beacon-chain/execution/testing" mocks "github.com/prysmaticlabs/prysm/v5/beacon-chain/execution/testing"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/verification" "github.com/prysmaticlabs/prysm/v5/beacon-chain/verification"
fieldparams "github.com/prysmaticlabs/prysm/v5/config/fieldparams" fieldparams "github.com/prysmaticlabs/prysm/v5/config/fieldparams"
@@ -2395,6 +2396,12 @@ func Test_ExchangeCapabilities(t *testing.T) {
}) })
} }
func mockSummary(t *testing.T, exists []bool) func(uint64) bool {
hi, err := filesystem.NewBlobStorageSummary(params.BeaconConfig().DenebForkEpoch, exists)
require.NoError(t, err)
return hi.HasIndex
}
func TestReconstructBlobSidecars(t *testing.T) { func TestReconstructBlobSidecars(t *testing.T) {
client := &Service{capabilityCache: &capabilityCache{}} client := &Service{capabilityCache: &capabilityCache{}}
b := util.NewBeaconBlockDeneb() b := util.NewBeaconBlockDeneb()
@@ -2408,15 +2415,15 @@ func TestReconstructBlobSidecars(t *testing.T) {
ctx := context.Background() ctx := context.Background()
t.Run("all seen", func(t *testing.T) { t.Run("all seen", func(t *testing.T) {
exists := []bool{true, true, true, true, true, true} hi := mockSummary(t, []bool{true, true, true, true, true, true})
verifiedBlobs, err := client.ReconstructBlobSidecars(ctx, sb, r, exists) verifiedBlobs, err := client.ReconstructBlobSidecars(ctx, sb, r, hi)
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, 0, len(verifiedBlobs)) require.Equal(t, 0, len(verifiedBlobs))
}) })
t.Run("get-blobs end point is not supported", func(t *testing.T) { t.Run("get-blobs end point is not supported", func(t *testing.T) {
exists := []bool{true, true, true, true, true, false} hi := mockSummary(t, []bool{true, true, true, true, true, false})
verifiedBlobs, err := client.ReconstructBlobSidecars(ctx, sb, r, exists) verifiedBlobs, err := client.ReconstructBlobSidecars(ctx, sb, r, hi)
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, 0, len(verifiedBlobs)) require.Equal(t, 0, len(verifiedBlobs))
}) })
@@ -2430,8 +2437,8 @@ func TestReconstructBlobSidecars(t *testing.T) {
rpcClient, client := setupRpcClient(t, srv.URL, client) rpcClient, client := setupRpcClient(t, srv.URL, client)
defer rpcClient.Close() defer rpcClient.Close()
exists := [6]bool{} hi := mockSummary(t, make([]bool, 6))
verifiedBlobs, err := client.ReconstructBlobSidecars(ctx, sb, r, exists[:]) verifiedBlobs, err := client.ReconstructBlobSidecars(ctx, sb, r, hi)
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, 6, len(verifiedBlobs)) require.Equal(t, 6, len(verifiedBlobs))
}) })
@@ -2443,23 +2450,11 @@ func TestReconstructBlobSidecars(t *testing.T) {
rpcClient, client := setupRpcClient(t, srv.URL, client) rpcClient, client := setupRpcClient(t, srv.URL, client)
defer rpcClient.Close() defer rpcClient.Close()
exists := []bool{true, false, true, false, true, false} hi := mockSummary(t, []bool{true, false, true, false, true, false})
verifiedBlobs, err := client.ReconstructBlobSidecars(ctx, sb, r, exists) verifiedBlobs, err := client.ReconstructBlobSidecars(ctx, sb, r, hi)
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, 3, len(verifiedBlobs)) require.Equal(t, 3, len(verifiedBlobs))
}) })
t.Run("kzg is longer than exist", func(t *testing.T) {
srv := createBlobServer(t, 3)
defer srv.Close()
rpcClient, client := setupRpcClient(t, srv.URL, client)
defer rpcClient.Close()
exists := []bool{true, false, true, false, true}
_, err := client.ReconstructBlobSidecars(ctx, sb, r, exists)
require.ErrorContains(t, "length of KZG commitments (6) is greater than length of exists (5)", err)
})
} }
func createRandomKzgCommitments(t *testing.T, num int) [][]byte { func createRandomKzgCommitments(t *testing.T, num int) [][]byte {

View File

@@ -109,7 +109,7 @@ func (e *EngineClient) ReconstructFullBellatrixBlockBatch(
} }
// ReconstructBlobSidecars is a mock implementation of the ReconstructBlobSidecars method. // ReconstructBlobSidecars is a mock implementation of the ReconstructBlobSidecars method.
func (e *EngineClient) ReconstructBlobSidecars(context.Context, interfaces.ReadOnlySignedBeaconBlock, [32]byte, []bool) ([]blocks.VerifiedROBlob, error) { func (e *EngineClient) ReconstructBlobSidecars(context.Context, interfaces.ReadOnlySignedBeaconBlock, [32]byte, func(uint64) bool) ([]blocks.VerifiedROBlob, error) {
return e.BlobSidecars, e.ErrorBlobSidecars return e.BlobSidecars, e.ErrorBlobSidecars
} }

View File

@@ -42,8 +42,7 @@ func TestBlobs(t *testing.T) {
denebBlock, blobs := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, 123, 4) denebBlock, blobs := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, 123, 4)
require.NoError(t, db.SaveBlock(context.Background(), denebBlock)) require.NoError(t, db.SaveBlock(context.Background(), denebBlock))
bs := filesystem.NewEphemeralBlobStorage(t) bs := filesystem.NewEphemeralBlobStorage(t)
testSidecars, err := verification.BlobSidecarSliceNoop(blobs) testSidecars := verification.FakeVerifySliceForTest(t, blobs)
require.NoError(t, err)
for i := range testSidecars { for i := range testSidecars {
require.NoError(t, bs.Save(testSidecars[i])) require.NoError(t, bs.Save(testSidecars[i]))
} }
@@ -418,8 +417,7 @@ func TestBlobs_Electra(t *testing.T) {
electraBlock, blobs := util.GenerateTestElectraBlockWithSidecar(t, [32]byte{}, 123, params.BeaconConfig().MaxBlobsPerBlockByVersion(version.Electra)) electraBlock, blobs := util.GenerateTestElectraBlockWithSidecar(t, [32]byte{}, 123, params.BeaconConfig().MaxBlobsPerBlockByVersion(version.Electra))
require.NoError(t, db.SaveBlock(context.Background(), electraBlock)) require.NoError(t, db.SaveBlock(context.Background(), electraBlock))
bs := filesystem.NewEphemeralBlobStorage(t) bs := filesystem.NewEphemeralBlobStorage(t)
testSidecars, err := verification.BlobSidecarSliceNoop(blobs) testSidecars := verification.FakeVerifySliceForTest(t, blobs)
require.NoError(t, err)
for i := range testSidecars { for i := range testSidecars {
require.NoError(t, bs.Save(testSidecars[i])) require.NoError(t, bs.Save(testSidecars[i]))
} }

View File

@@ -235,19 +235,10 @@ func (p *BeaconDbBlocker) Blobs(ctx context.Context, id string, indices []uint64
return make([]*blocks.VerifiedROBlob, 0), nil return make([]*blocks.VerifiedROBlob, 0), nil
} }
if len(indices) == 0 { if len(indices) == 0 {
m, err := p.BlobStorage.Indices(bytesutil.ToBytes32(root), b.Block().Slot()) sum := p.BlobStorage.Summary(bytesutil.ToBytes32(root))
if err != nil { for i := range commitments {
log.WithFields(log.Fields{ if sum.HasIndex(uint64(i)) {
"blockRoot": hexutil.Encode(root), indices = append(indices, uint64(i))
}).Error(errors.Wrapf(err, "could not retrieve blob indices for root %#x", root))
return nil, &core.RpcError{Err: fmt.Errorf("could not retrieve blob indices for root %#x", root), Reason: core.Internal}
}
for k, v := range m {
if v {
if k >= len(commitments) {
return nil, &core.RpcError{Err: fmt.Errorf("blob index %d is more than blob kzg commitments :%dd", k, len(commitments)), Reason: core.BadRequest}
}
indices = append(indices, uint64(k))
} }
} }
} }

View File

@@ -166,9 +166,8 @@ func TestGetBlob(t *testing.T) {
db := testDB.SetupDB(t) db := testDB.SetupDB(t)
denebBlock, blobs := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, 123, 4) denebBlock, blobs := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, 123, 4)
require.NoError(t, db.SaveBlock(context.Background(), denebBlock)) require.NoError(t, db.SaveBlock(context.Background(), denebBlock))
_, bs := filesystem.NewEphemeralBlobStorageWithFs(t) _, bs := filesystem.NewEphemeralBlobStorageAndFs(t)
testSidecars, err := verification.BlobSidecarSliceNoop(blobs) testSidecars := verification.FakeVerifySliceForTest(t, blobs)
require.NoError(t, err)
for i := range testSidecars { for i := range testSidecars {
require.NoError(t, bs.Save(testSidecars[i])) require.NoError(t, bs.Save(testSidecars[i]))
} }

View File

@@ -250,8 +250,7 @@ func (c *blobsTestCase) run(t *testing.T) {
} }
} }
for _, blobSidecars := range m { for _, blobSidecars := range m {
v, err := verification.BlobSidecarSliceNoop(blobSidecars) v := verification.FakeVerifySliceForTest(t, blobSidecars)
require.NoError(t, err)
for i := range v { for i := range v {
require.NoError(t, s.cfg.blobStorage.Save(v[i])) require.NoError(t, s.cfg.blobStorage.Save(v[i]))
} }

View File

@@ -11,7 +11,6 @@ import (
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/core/transition" "github.com/prysmaticlabs/prysm/v5/beacon-chain/core/transition"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/das" "github.com/prysmaticlabs/prysm/v5/beacon-chain/das"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/db/filesystem"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/sync" "github.com/prysmaticlabs/prysm/v5/beacon-chain/sync"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/verification" "github.com/prysmaticlabs/prysm/v5/beacon-chain/verification"
"github.com/prysmaticlabs/prysm/v5/consensus-types/blocks" "github.com/prysmaticlabs/prysm/v5/consensus-types/blocks"
@@ -70,14 +69,6 @@ func (s *Service) startBlocksQueue(ctx context.Context, highestSlot primitives.S
return nil, errors.Wrapf(err, "unable to initialize context version map using genesis validator root = %#x", vr) return nil, errors.Wrapf(err, "unable to initialize context version map using genesis validator root = %#x", vr)
} }
summarizer, err := s.cfg.BlobStorage.WaitForSummarizer(ctx)
if err != nil {
// The summarizer is an optional optimization, we can continue without, only stop if there is a different error.
if !errors.Is(err, filesystem.ErrBlobStorageSummarizerUnavailable) {
return nil, err
}
summarizer = nil // This should already be nil, but we'll set it just to be safe.
}
cfg := &blocksQueueConfig{ cfg := &blocksQueueConfig{
p2p: s.cfg.P2P, p2p: s.cfg.P2P,
db: s.cfg.DB, db: s.cfg.DB,
@@ -86,7 +77,7 @@ func (s *Service) startBlocksQueue(ctx context.Context, highestSlot primitives.S
ctxMap: ctxMap, ctxMap: ctxMap,
highestExpectedSlot: highestSlot, highestExpectedSlot: highestSlot,
mode: mode, mode: mode,
bs: summarizer, bs: s.cfg.BlobStorage,
} }
queue := newBlocksQueue(ctx, cfg) queue := newBlocksQueue(ctx, cfg)
if err := queue.start(); err != nil { if err := queue.start(); err != nil {

View File

@@ -292,13 +292,10 @@ func missingBlobRequest(blk blocks.ROBlock, store *filesystem.BlobStorage) (p2pt
if len(cmts) == 0 { if len(cmts) == 0 {
return nil, nil return nil, nil
} }
onDisk, err := store.Indices(r, blk.Block().Slot()) onDisk := store.Summary(r)
if err != nil {
return nil, errors.Wrapf(err, "error checking existing blobs for checkpoint sync block root %#x", r)
}
req := make(p2ptypes.BlobSidecarsByRootReq, 0, len(cmts)) req := make(p2ptypes.BlobSidecarsByRootReq, 0, len(cmts))
for i := range cmts { for i := range cmts {
if onDisk[i] { if onDisk.HasIndex(uint64(i)) {
continue continue
} }
req = append(req, &eth.BlobIdentifier{BlockRoot: r[:], Index: uint64(i)}) req = append(req, &eth.BlobIdentifier{BlockRoot: r[:], Index: uint64(i)})

View File

@@ -464,7 +464,7 @@ func TestMissingBlobRequest(t *testing.T) {
setup: func(t *testing.T) (blocks.ROBlock, *filesystem.BlobStorage) { setup: func(t *testing.T) (blocks.ROBlock, *filesystem.BlobStorage) {
bk, _ := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, 0, 2) bk, _ := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, 0, 2)
bm, fs := filesystem.NewEphemeralBlobStorageWithMocker(t) bm, fs := filesystem.NewEphemeralBlobStorageWithMocker(t)
require.NoError(t, bm.CreateFakeIndices(bk.Root(), 1)) require.NoError(t, bm.CreateFakeIndices(bk.Root(), bk.Block().Slot(), 1))
return bk, fs return bk, fs
}, },
nReq: 1, nReq: 1,
@@ -474,7 +474,7 @@ func TestMissingBlobRequest(t *testing.T) {
setup: func(t *testing.T) (blocks.ROBlock, *filesystem.BlobStorage) { setup: func(t *testing.T) (blocks.ROBlock, *filesystem.BlobStorage) {
bk, _ := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, 0, 2) bk, _ := util.GenerateTestDenebBlockWithSidecar(t, [32]byte{}, 0, 2)
bm, fs := filesystem.NewEphemeralBlobStorageWithMocker(t) bm, fs := filesystem.NewEphemeralBlobStorageWithMocker(t)
require.NoError(t, bm.CreateFakeIndices(bk.Root(), 0, 1)) require.NoError(t, bm.CreateFakeIndices(bk.Root(), bk.Block().Slot(), 0, 1))
return bk, fs return bk, fs
}, },
nReq: 0, nReq: 0,

View File

@@ -7,6 +7,7 @@ import (
libp2pcore "github.com/libp2p/go-libp2p/core" libp2pcore "github.com/libp2p/go-libp2p/core"
"github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/peer"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/db/filesystem"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/execution" "github.com/prysmaticlabs/prysm/v5/beacon-chain/execution"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/p2p/types" "github.com/prysmaticlabs/prysm/v5/beacon-chain/p2p/types"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/sync/verify" "github.com/prysmaticlabs/prysm/v5/beacon-chain/sync/verify"
@@ -14,7 +15,6 @@ import (
"github.com/prysmaticlabs/prysm/v5/config/params" "github.com/prysmaticlabs/prysm/v5/config/params"
"github.com/prysmaticlabs/prysm/v5/consensus-types/blocks" "github.com/prysmaticlabs/prysm/v5/consensus-types/blocks"
"github.com/prysmaticlabs/prysm/v5/consensus-types/interfaces" "github.com/prysmaticlabs/prysm/v5/consensus-types/interfaces"
"github.com/prysmaticlabs/prysm/v5/consensus-types/primitives"
eth "github.com/prysmaticlabs/prysm/v5/proto/prysm/v1alpha1" eth "github.com/prysmaticlabs/prysm/v5/proto/prysm/v1alpha1"
"github.com/prysmaticlabs/prysm/v5/runtime/version" "github.com/prysmaticlabs/prysm/v5/runtime/version"
"github.com/prysmaticlabs/prysm/v5/time/slots" "github.com/prysmaticlabs/prysm/v5/time/slots"
@@ -181,29 +181,26 @@ func (s *Service) pendingBlobsRequestForBlock(root [32]byte, b interfaces.ReadOn
if len(cc) == 0 { if len(cc) == 0 {
return nil, nil return nil, nil
} }
return s.constructPendingBlobsRequest(root, len(cc), b.Block().Slot()) return s.constructPendingBlobsRequest(root, len(cc))
} }
// constructPendingBlobsRequest creates a request for BlobSidecars by root, considering blobs already in DB. // constructPendingBlobsRequest creates a request for BlobSidecars by root, considering blobs already in DB.
func (s *Service) constructPendingBlobsRequest(root [32]byte, commitments int, slot primitives.Slot) (types.BlobSidecarsByRootReq, error) { func (s *Service) constructPendingBlobsRequest(root [32]byte, commitments int) (types.BlobSidecarsByRootReq, error) {
if commitments == 0 { if commitments == 0 {
return nil, nil return nil, nil
} }
stored, err := s.cfg.blobStorage.Indices(root, slot) summary := s.cfg.blobStorage.Summary(root)
if err != nil {
return nil, err
}
return requestsForMissingIndices(stored, commitments, root), nil return requestsForMissingIndices(summary, commitments, root), nil
} }
// requestsForMissingIndices constructs a slice of BlobIdentifiers that are missing from // requestsForMissingIndices constructs a slice of BlobIdentifiers that are missing from
// local storage, based on a mapping that represents which indices are locally stored, // local storage, based on a mapping that represents which indices are locally stored,
// and the highest expected index. // and the highest expected index.
func requestsForMissingIndices(storedIndices []bool, commitments int, root [32]byte) []*eth.BlobIdentifier { func requestsForMissingIndices(stored filesystem.BlobStorageSummary, commitments int, root [32]byte) []*eth.BlobIdentifier {
var ids []*eth.BlobIdentifier var ids []*eth.BlobIdentifier
for i := uint64(0); i < uint64(commitments); i++ { for i := uint64(0); i < uint64(commitments); i++ {
if !storedIndices[i] { if !stored.HasIndex(i) {
ids = append(ids, &eth.BlobIdentifier{Index: i, BlockRoot: root[:]}) ids = append(ids, &eth.BlobIdentifier{Index: i, BlockRoot: root[:]})
} }
} }

View File

@@ -423,7 +423,7 @@ func TestConstructPendingBlobsRequest(t *testing.T) {
// No unknown indices. // No unknown indices.
root := [32]byte{1} root := [32]byte{1}
count := 3 count := 3
actual, err := s.constructPendingBlobsRequest(root, count, 100) actual, err := s.constructPendingBlobsRequest(root, count)
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, 3, len(actual)) require.Equal(t, 3, len(actual))
for i, id := range actual { for i, id := range actual {
@@ -444,8 +444,7 @@ func TestConstructPendingBlobsRequest(t *testing.T) {
util.GenerateTestDenebBlobSidecar(t, root, header, 0, bytesutil.PadTo([]byte{}, 48), make([][]byte, 0)), util.GenerateTestDenebBlobSidecar(t, root, header, 0, bytesutil.PadTo([]byte{}, 48), make([][]byte, 0)),
util.GenerateTestDenebBlobSidecar(t, root, header, 2, bytesutil.PadTo([]byte{}, 48), make([][]byte, 0)), util.GenerateTestDenebBlobSidecar(t, root, header, 2, bytesutil.PadTo([]byte{}, 48), make([][]byte, 0)),
} }
vscs, err := verification.BlobSidecarSliceNoop(blobSidecars) vscs := verification.FakeVerifySliceForTest(t, blobSidecars)
require.NoError(t, err)
for i := range vscs { for i := range vscs {
require.NoError(t, bs.Save(vscs[i])) require.NoError(t, bs.Save(vscs[i]))
} }
@@ -453,15 +452,13 @@ func TestConstructPendingBlobsRequest(t *testing.T) {
expected := []*ethpb.BlobIdentifier{ expected := []*ethpb.BlobIdentifier{
{Index: 1, BlockRoot: root[:]}, {Index: 1, BlockRoot: root[:]},
} }
actual, err = s.constructPendingBlobsRequest(root, count, 100) actual, err = s.constructPendingBlobsRequest(root, count)
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, expected[0].Index, actual[0].Index) require.Equal(t, expected[0].Index, actual[0].Index)
require.DeepEqual(t, expected[0].BlockRoot, actual[0].BlockRoot) require.DeepEqual(t, expected[0].BlockRoot, actual[0].BlockRoot)
} }
func TestFilterUnknownIndices(t *testing.T) { func TestFilterUnknownIndices(t *testing.T) {
haveIndices := []bool{true, true, true, false, false, false}
blockRoot := [32]byte{} blockRoot := [32]byte{}
count := 5 count := 5
@@ -470,7 +467,11 @@ func TestFilterUnknownIndices(t *testing.T) {
{Index: 4, BlockRoot: blockRoot[:]}, {Index: 4, BlockRoot: blockRoot[:]},
} }
actual := requestsForMissingIndices(haveIndices, count, blockRoot) sum, err := filesystem.NewBlobStorageSummary(
params.BeaconConfig().DenebForkEpoch,
[]bool{true, true, true, false, false, false})
require.NoError(t, err)
actual := requestsForMissingIndices(sum, count, blockRoot)
require.Equal(t, len(expected), len(actual)) require.Equal(t, len(expected), len(actual))
require.Equal(t, expected[0].Index, actual[0].Index) require.Equal(t, expected[0].Index, actual[0].Index)
require.DeepEqual(t, actual[0].BlockRoot, expected[0].BlockRoot) require.DeepEqual(t, actual[0].BlockRoot, expected[0].BlockRoot)

View File

@@ -27,14 +27,10 @@ func (s *Service) streamBlobBatch(ctx context.Context, batch blockBatch, wQuota
defer span.End() defer span.End()
for _, b := range batch.canonical() { for _, b := range batch.canonical() {
root := b.Root() root := b.Root()
idxs, err := s.cfg.blobStorage.Indices(b.Root(), b.Block().Slot()) idxs := s.cfg.blobStorage.Summary(root)
if err != nil { for i := range idxs.MaxBlobsForEpoch() {
s.writeErrorResponseToStream(responseCodeServerError, p2ptypes.ErrGeneric.Error(), stream)
return wQuota, errors.Wrapf(err, "could not retrieve sidecars for block root %#x", root)
}
for i, l := uint64(0), uint64(len(idxs)); i < l; i++ {
// index not available, skip // index not available, skip
if !idxs[i] { if !idxs.HasIndex(i) {
continue continue
} }
// We won't check for file not found since the .Indices method should normally prevent that from happening. // We won't check for file not found since the .Indices method should normally prevent that from happening.

View File

@@ -81,19 +81,20 @@ func (s *Service) reconstructAndBroadcastBlobs(ctx context.Context, block interf
if s.cfg.blobStorage == nil { if s.cfg.blobStorage == nil {
return return
} }
indices, err := s.cfg.blobStorage.Indices(blockRoot, block.Block().Slot()) summary := s.cfg.blobStorage.Summary(blockRoot)
cmts, err := block.Block().Body().BlobKzgCommitments()
if err != nil { if err != nil {
log.WithError(err).Error("Failed to retrieve indices for block") log.WithError(err).Error("Failed to read commitments from block")
return return
} }
for _, index := range indices { for i := range cmts {
if index { if summary.HasIndex(uint64(i)) {
blobExistedInDBTotal.Inc() blobExistedInDBTotal.Inc()
} }
} }
// Reconstruct blob sidecars from the EL // Reconstruct blob sidecars from the EL
blobSidecars, err := s.cfg.executionReconstructor.ReconstructBlobSidecars(ctx, block, blockRoot, indices) blobSidecars, err := s.cfg.executionReconstructor.ReconstructBlobSidecars(ctx, block, blockRoot, summary.HasIndex)
if err != nil { if err != nil {
log.WithError(err).Error("Failed to reconstruct blob sidecars") log.WithError(err).Error("Failed to reconstruct blob sidecars")
return return
@@ -103,15 +104,12 @@ func (s *Service) reconstructAndBroadcastBlobs(ctx context.Context, block interf
} }
// Refresh indices as new blobs may have been added to the db // Refresh indices as new blobs may have been added to the db
indices, err = s.cfg.blobStorage.Indices(blockRoot, block.Block().Slot()) summary = s.cfg.blobStorage.Summary(blockRoot)
if err != nil {
log.WithError(err).Error("Failed to retrieve indices for block")
return
}
// Broadcast blob sidecars first than save them to the db // Broadcast blob sidecars first than save them to the db
for _, sidecar := range blobSidecars { for _, sidecar := range blobSidecars {
if sidecar.Index >= uint64(len(indices)) || indices[sidecar.Index] { // Don't broadcast the blob if it has appeared on disk.
if summary.HasIndex(sidecar.Index) {
continue continue
} }
if err := s.cfg.p2p.BroadcastBlob(ctx, sidecar.Index, sidecar.BlobSidecar); err != nil { if err := s.cfg.p2p.BroadcastBlob(ctx, sidecar.Index, sidecar.BlobSidecar); err != nil {
@@ -120,8 +118,7 @@ func (s *Service) reconstructAndBroadcastBlobs(ctx context.Context, block interf
} }
for _, sidecar := range blobSidecars { for _, sidecar := range blobSidecars {
if sidecar.Index >= uint64(len(indices)) || indices[sidecar.Index] { if summary.HasIndex(sidecar.Index) {
blobExistedInDBTotal.Inc()
continue continue
} }
if err := s.subscribeBlob(ctx, sidecar); err != nil { if err := s.subscribeBlob(ctx, sidecar); err != nil {

View File

@@ -8,6 +8,7 @@ go_library(
"cache.go", "cache.go",
"error.go", "error.go",
"fake.go", "fake.go",
"filesystem.go",
"initializer.go", "initializer.go",
"interface.go", "interface.go",
"log.go", "log.go",
@@ -40,6 +41,7 @@ go_library(
"@com_github_prometheus_client_golang//prometheus:go_default_library", "@com_github_prometheus_client_golang//prometheus:go_default_library",
"@com_github_prometheus_client_golang//prometheus/promauto:go_default_library", "@com_github_prometheus_client_golang//prometheus/promauto:go_default_library",
"@com_github_sirupsen_logrus//:go_default_library", "@com_github_sirupsen_logrus//:go_default_library",
"@com_github_spf13_afero//:go_default_library",
], ],
) )

View File

@@ -1,6 +1,9 @@
package verification package verification
import "github.com/pkg/errors" import (
"github.com/pkg/errors"
"github.com/prysmaticlabs/prysm/v5/consensus-types/blocks"
)
var ( var (
// ErrFromFutureSlot means RequireSlotNotTooEarly failed. // ErrFromFutureSlot means RequireSlotNotTooEarly failed.
@@ -35,6 +38,10 @@ var (
// ErrMissingVerification indicates that the given verification function was never performed on the value. // ErrMissingVerification indicates that the given verification function was never performed on the value.
ErrMissingVerification = errors.New("verification was not performed for requirement") ErrMissingVerification = errors.New("verification was not performed for requirement")
// errVerificationImplementationFault indicates that a code path yielding VerifiedROBlobs has an implementation
// error, leading it to call VerifiedROBlobError with a nil error.
errVerificationImplementationFault = errors.New("could not verify blob data or create a valid VerifiedROBlob.")
) )
// VerificationMultiError is a custom error that can be used to access individual verification failures. // VerificationMultiError is a custom error that can be used to access individual verification failures.
@@ -68,3 +75,12 @@ func (ve VerificationMultiError) Failures() map[Requirement]error {
func newVerificationMultiError(r *results, err error) VerificationMultiError { func newVerificationMultiError(r *results, err error) VerificationMultiError {
return VerificationMultiError{r: r, err: err} return VerificationMultiError{r: r, err: err}
} }
// VerifiedROBlobError can be used by methods that have a VerifiedROBlob return type but do not have permission to
// create a value of that type in order to generate an error return value.
func VerifiedROBlobError(err error) (blocks.VerifiedROBlob, error) {
if err == nil {
return blocks.VerifiedROBlob{}, errVerificationImplementationFault
}
return blocks.VerifiedROBlob{}, err
}

View File

@@ -6,22 +6,6 @@ import (
"github.com/prysmaticlabs/prysm/v5/consensus-types/blocks" "github.com/prysmaticlabs/prysm/v5/consensus-types/blocks"
) )
// BlobSidecarNoop is a FAKE verification function that simply launders a ROBlob->VerifiedROBlob.
// TODO: find all code that uses this method and replace it with full verification.
func BlobSidecarNoop(b blocks.ROBlob) (blocks.VerifiedROBlob, error) {
return blocks.NewVerifiedROBlob(b), nil
}
// BlobSidecarSliceNoop is a FAKE verification function that simply launders a ROBlob->VerifiedROBlob.
// TODO: find all code that uses this method and replace it with full verification.
func BlobSidecarSliceNoop(b []blocks.ROBlob) ([]blocks.VerifiedROBlob, error) {
vbs := make([]blocks.VerifiedROBlob, len(b))
for i := range b {
vbs[i] = blocks.NewVerifiedROBlob(b[i])
}
return vbs, nil
}
// FakeVerifyForTest can be used by tests that need a VerifiedROBlob but don't want to do all the // FakeVerifyForTest can be used by tests that need a VerifiedROBlob but don't want to do all the
// expensive set up to perform full validation. // expensive set up to perform full validation.
func FakeVerifyForTest(t *testing.T, b blocks.ROBlob) blocks.VerifiedROBlob { func FakeVerifyForTest(t *testing.T, b blocks.ROBlob) blocks.VerifiedROBlob {
@@ -35,7 +19,6 @@ func FakeVerifyForTest(t *testing.T, b blocks.ROBlob) blocks.VerifiedROBlob {
func FakeVerifySliceForTest(t *testing.T, b []blocks.ROBlob) []blocks.VerifiedROBlob { func FakeVerifySliceForTest(t *testing.T, b []blocks.ROBlob) []blocks.VerifiedROBlob {
// log so that t is truly required // log so that t is truly required
t.Log("producing fake []VerifiedROBlob for a test") t.Log("producing fake []VerifiedROBlob for a test")
// tautological assertion that ensures this function can only be used in tests.
vbs := make([]blocks.VerifiedROBlob, len(b)) vbs := make([]blocks.VerifiedROBlob, len(b))
for i := range b { for i := range b {
vbs[i] = blocks.NewVerifiedROBlob(b[i]) vbs[i] = blocks.NewVerifiedROBlob(b[i])

View File

@@ -0,0 +1,23 @@
package verification
import (
"github.com/prysmaticlabs/prysm/v5/consensus-types/blocks"
ethpb "github.com/prysmaticlabs/prysm/v5/proto/prysm/v1alpha1"
"github.com/spf13/afero"
)
func VerifiedROBlobFromDisk(fs afero.Fs, root [32]byte, path string) (blocks.VerifiedROBlob, error) {
encoded, err := afero.ReadFile(fs, path)
if err != nil {
return VerifiedROBlobError(err)
}
s := &ethpb.BlobSidecar{}
if err := s.UnmarshalSSZ(encoded); err != nil {
return VerifiedROBlobError(err)
}
ro, err := blocks.NewROBlobWithRoot(s, root)
if err != nil {
return VerifiedROBlobError(err)
}
return blocks.NewVerifiedROBlob(ro), nil
}

View File

@@ -0,0 +1,2 @@
### Added
- New option to select an alternate blob storage layout. Rather than a flat directory with a subdir for each block root, a multi-level scheme is used to organize blobs by epoch/slot/root, enabling leaner syscalls, indexing and pruning.

View File

@@ -145,6 +145,7 @@ var appFlags = []cli.Flag{
flags.JwtId, flags.JwtId,
storage.BlobStoragePathFlag, storage.BlobStoragePathFlag,
storage.BlobRetentionEpochFlag, storage.BlobRetentionEpochFlag,
storage.BlobStorageLayout,
bflags.EnableExperimentalBackfill, bflags.EnableExperimentalBackfill,
bflags.BackfillBatchSize, bflags.BackfillBatchSize,
bflags.BackfillWorkerCount, bflags.BackfillWorkerCount,

View File

@@ -2,6 +2,7 @@ package storage
import ( import (
"path" "path"
"strings"
"github.com/pkg/errors" "github.com/pkg/errors"
"github.com/prysmaticlabs/prysm/v5/beacon-chain/db/filesystem" "github.com/prysmaticlabs/prysm/v5/beacon-chain/db/filesystem"
@@ -24,8 +25,30 @@ var (
Value: uint64(params.BeaconConfig().MinEpochsForBlobsSidecarsRequest), Value: uint64(params.BeaconConfig().MinEpochsForBlobsSidecarsRequest),
Aliases: []string{"extend-blob-retention-epoch"}, Aliases: []string{"extend-blob-retention-epoch"},
} }
BlobStorageLayout = &cli.StringFlag{
Name: "blob-storage-layout",
Usage: layoutFlagUsage(),
Value: filesystem.LayoutNameFlat,
}
) )
func layoutOptions() string {
return "available options are: " + strings.Join(filesystem.LayoutNames, ", ") + "."
}
func layoutFlagUsage() string {
return "Dictates how to organize the blob directory structure on disk, " + layoutOptions()
}
func validateLayoutFlag(_ *cli.Context, v string) error {
for _, l := range filesystem.LayoutNames {
if v == l {
return nil
}
}
return errors.Errorf("invalid value '%s' for flag --%s, %s", v, BlobStorageLayout.Name, layoutOptions())
}
// BeaconNodeOptions sets configuration values on the node.BeaconNode value at node startup. // BeaconNodeOptions sets configuration values on the node.BeaconNode value at node startup.
// Note: we can't get the right context from cli.Context, because the beacon node setup code uses this context to // Note: we can't get the right context from cli.Context, because the beacon node setup code uses this context to
// create a cancellable context. If we switch to using App.RunContext, we can set up this cancellation in the cmd // create a cancellable context. If we switch to using App.RunContext, we can set up this cancellation in the cmd
@@ -36,7 +59,9 @@ func BeaconNodeOptions(c *cli.Context) ([]node.Option, error) {
return nil, err return nil, err
} }
opts := []node.Option{node.WithBlobStorageOptions( opts := []node.Option{node.WithBlobStorageOptions(
filesystem.WithBlobRetentionEpochs(e), filesystem.WithBasePath(blobStoragePath(c)), filesystem.WithBlobRetentionEpochs(e),
filesystem.WithBasePath(blobStoragePath(c)),
filesystem.WithLayout(c.String(BlobStorageLayout.Name)), // This is validated in the Action func for BlobStorageLayout.
)} )}
return opts, nil return opts, nil
} }
@@ -69,3 +94,7 @@ func blobRetentionEpoch(cliCtx *cli.Context) (primitives.Epoch, error) {
return re, nil return re, nil
} }
func init() {
BlobStorageLayout.Action = validateLayoutFlag
}

View File

@@ -142,6 +142,7 @@ var appHelpFlagGroups = []flagGroup{
genesis.BeaconAPIURL, genesis.BeaconAPIURL,
storage.BlobStoragePathFlag, storage.BlobStoragePathFlag,
storage.BlobRetentionEpochFlag, storage.BlobRetentionEpochFlag,
storage.BlobStorageLayout,
backfill.EnableExperimentalBackfill, backfill.EnableExperimentalBackfill,
backfill.BackfillWorkerCount, backfill.BackfillWorkerCount,
backfill.BackfillBatchSize, backfill.BackfillBatchSize,

View File

@@ -403,6 +403,15 @@ func (b *BeaconChainConfig) MaxBlobsPerBlockByVersion(v int) int {
return b.DeprecatedMaxBlobsPerBlock return b.DeprecatedMaxBlobsPerBlock
} }
// MaxBlobsPerBlockByEpoch returns the maximum number of blobs per block for the given epoch,
// adjusting for the Electra fork.
func (b *BeaconChainConfig) MaxBlobsPerBlockAtEpoch(epoch primitives.Epoch) int {
if epoch >= b.ElectraForkEpoch {
return b.DeprecatedMaxBlobsPerBlockElectra
}
return b.DeprecatedMaxBlobsPerBlock
}
// DenebEnabled centralizes the check to determine if code paths // DenebEnabled centralizes the check to determine if code paths
// that are specific to deneb should be allowed to execute. This will make it easier to find call sites that do this // that are specific to deneb should be allowed to execute. This will make it easier to find call sites that do this
// kind of check and remove them post-deneb. // kind of check and remove them post-deneb.