default new blob storage layouts to by-epoch (#15904)

* default new blob storage layouts to by-epoch

also, do not log migration message until we see a directory that needs to be migrated

Co-authored-by: Manu NALEPA <enalepa@offchainlabs.com>

* manu feedback

---------

Co-authored-by: Kasey Kirkham <kasey@users.noreply.github.com>
Co-authored-by: Manu NALEPA <enalepa@offchainlabs.com>
This commit is contained in:
kasey
2025-10-22 15:09:18 -05:00
committed by GitHub
parent 7dd4f5948c
commit 9c4774b82e
9 changed files with 213 additions and 24 deletions

View File

@@ -212,7 +212,8 @@ func filterNoop(_ string) bool {
return true
}
func isRootDir(p string) bool {
// IsBlockRootDir returns true if the path segment looks like a block root directory.
func IsBlockRootDir(p string) bool {
dir := filepath.Base(p)
return len(dir) == rootStringLen && strings.HasPrefix(dir, "0x")
}

View File

@@ -188,7 +188,7 @@ func TestListDir(t *testing.T) {
name: "root filter",
dirPath: ".",
expected: []string{childlessBlob.name, blobWithSsz.name, blobWithSszAndTmp.name},
filter: isRootDir,
filter: IsBlockRootDir,
},
{
name: "ssz filter",

View File

@@ -19,12 +19,14 @@ import (
const (
// Full root in directory will be 66 chars, eg:
// >>> len('0x0002fb4db510b8618b04dc82d023793739c26346a8b02eb73482e24b0fec0555') == 66
rootStringLen = 66
sszExt = "ssz"
partExt = "part"
periodicEpochBaseDir = "by-epoch"
rootStringLen = 66
sszExt = "ssz"
partExt = "part"
)
// PeriodicEpochBaseDir is the name of the base directory for the by-epoch layout.
const PeriodicEpochBaseDir = "by-epoch"
const (
LayoutNameFlat = "flat"
LayoutNameByEpoch = "by-epoch"
@@ -130,11 +132,11 @@ func migrateLayout(fs afero.Fs, from, to fsLayout, cache *blobStorageSummaryCach
if iter.atEOF() {
return errLayoutNotDetected
}
log.WithField("fromLayout", from.name()).WithField("toLayout", to.name()).Info("Migrating blob filesystem layout. This one-time operation can take extra time (up to a few minutes for systems with extended blob storage and a cold disk cache).")
lastMoved := ""
parentDirs := make(map[string]bool) // this map should have < 65k keys by design
moved := 0
dc := newDirCleaner()
migrationLogged := false
for ident, err := iter.next(); !errors.Is(err, io.EOF); ident, err = iter.next() {
if err != nil {
if errors.Is(err, errIdentFailure) {
@@ -146,6 +148,11 @@ func migrateLayout(fs afero.Fs, from, to fsLayout, cache *blobStorageSummaryCach
}
return errors.Wrapf(errMigrationFailure, "failed to iterate previous layout structure while migrating blobs, err=%s", err.Error())
}
if !migrationLogged {
log.WithField("fromLayout", from.name()).WithField("toLayout", to.name()).
Info("Migrating blob filesystem layout. This one-time operation can take extra time (up to a few minutes for systems with extended blob storage and a cold disk cache).")
migrationLogged = true
}
src := from.dir(ident)
target := to.dir(ident)
if src != lastMoved {

View File

@@ -34,7 +34,7 @@ func (l *periodicEpochLayout) name() string {
func (l *periodicEpochLayout) blockParentDirs(ident blobIdent) []string {
return []string{
periodicEpochBaseDir,
PeriodicEpochBaseDir,
l.periodDir(ident.epoch),
l.epochDir(ident.epoch),
}
@@ -50,28 +50,28 @@ func (l *periodicEpochLayout) notify(ident blobIdent) error {
// If before == 0, it won't be used as a filter and all idents will be returned.
func (l *periodicEpochLayout) iterateIdents(before primitives.Epoch) (*identIterator, error) {
_, err := l.fs.Stat(periodicEpochBaseDir)
_, err := l.fs.Stat(PeriodicEpochBaseDir)
if err != nil {
if os.IsNotExist(err) {
return &identIterator{eof: true}, nil // The directory is non-existent, which is fine; stop iteration.
}
return nil, errors.Wrapf(err, "error reading path %s", periodicEpochBaseDir)
return nil, errors.Wrapf(err, "error reading path %s", PeriodicEpochBaseDir)
}
// iterate root, which should have directories named by "period"
entries, err := listDir(l.fs, periodicEpochBaseDir)
entries, err := listDir(l.fs, PeriodicEpochBaseDir)
if err != nil {
return nil, errors.Wrapf(err, "failed to list %s", periodicEpochBaseDir)
return nil, errors.Wrapf(err, "failed to list %s", PeriodicEpochBaseDir)
}
return &identIterator{
fs: l.fs,
path: periodicEpochBaseDir,
path: PeriodicEpochBaseDir,
// Please see comments on the `layers` field in `identIterator`` if the role of the layers is unclear.
layers: []layoutLayer{
{populateIdent: populateNoop, filter: isBeforePeriod(before)},
{populateIdent: populateEpoch, filter: isBeforeEpoch(before)},
{populateIdent: populateRoot, filter: isRootDir}, // extract root from path
{populateIdent: populateIndex, filter: isSszFile}, // extract index from filename
{populateIdent: populateRoot, filter: IsBlockRootDir}, // extract root from path
{populateIdent: populateIndex, filter: isSszFile}, // extract index from filename
},
entries: entries,
}, nil
@@ -98,7 +98,7 @@ func (l *periodicEpochLayout) epochDir(epoch primitives.Epoch) string {
}
func (l *periodicEpochLayout) periodDir(epoch primitives.Epoch) string {
return filepath.Join(periodicEpochBaseDir, fmt.Sprintf("%d", periodForEpoch(epoch)))
return filepath.Join(PeriodicEpochBaseDir, fmt.Sprintf("%d", periodForEpoch(epoch)))
}
func (l *periodicEpochLayout) sszPath(n blobIdent) string {

View File

@@ -30,7 +30,7 @@ func (l *flatLayout) iterateIdents(before primitives.Epoch) (*identIterator, err
if os.IsNotExist(err) {
return &identIterator{eof: true}, nil // The directory is non-existent, which is fine; stop iteration.
}
return nil, errors.Wrapf(err, "error reading path %s", periodicEpochBaseDir)
return nil, errors.Wrap(err, "error reading blob base dir")
}
entries, err := listDir(l.fs, ".")
if err != nil {
@@ -199,10 +199,10 @@ func (l *flatSlotReader) isSSZAndBefore(fname string) bool {
// the epoch can be determined.
func isFlatCachedAndBefore(cache *blobStorageSummaryCache, before primitives.Epoch) func(string) bool {
if before == 0 {
return isRootDir
return IsBlockRootDir
}
return func(p string) bool {
if !isRootDir(p) {
if !IsBlockRootDir(p) {
return false
}
root, err := rootFromPath(p)

View File

@@ -0,0 +1,2 @@
### Changed
- Use the `by-epoch' blob storage layout by default and log a warning to users who continue to use the flat layout, encouraging them to switch.

View File

@@ -12,6 +12,7 @@ go_library(
"//config/params:go_default_library",
"//consensus-types/primitives:go_default_library",
"@com_github_pkg_errors//:go_default_library",
"@com_github_sirupsen_logrus//:go_default_library",
"@com_github_urfave_cli_v2//:go_default_library",
],
)
@@ -19,8 +20,10 @@ go_library(
go_test(
name = "go_default_test",
srcs = ["options_test.go"],
data = glob(["testdata/**"]),
embed = [":go_default_library"],
deps = [
"//beacon-chain/db/filesystem:go_default_library",
"//cmd:go_default_library",
"//config/params:go_default_library",
"//consensus-types/primitives:go_default_library",

View File

@@ -1,6 +1,8 @@
package storage
import (
"fmt"
"os"
"path"
"strings"
@@ -10,6 +12,7 @@ import (
"github.com/OffchainLabs/prysm/v6/config/params"
"github.com/OffchainLabs/prysm/v6/consensus-types/primitives"
"github.com/pkg/errors"
log "github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
@@ -25,9 +28,9 @@ var (
Aliases: []string{"extend-blob-retention-epoch"},
}
BlobStorageLayout = &cli.StringFlag{
Name: "blob-storage-layout",
Usage: layoutFlagUsage(),
Value: filesystem.LayoutNameFlat,
Name: "blob-storage-layout",
Usage: layoutFlagUsage(),
DefaultText: fmt.Sprintf("\"%s\", unless a different existing layout is detected", filesystem.LayoutNameByEpoch),
}
DataColumnStoragePathFlag = &cli.PathFlag{
Name: "data-column-path",
@@ -35,6 +38,14 @@ var (
}
)
// Flags is the list of CLI flags for configuring blob storage.
var Flags = []cli.Flag{
BlobStoragePathFlag,
BlobRetentionEpochFlag,
BlobStorageLayout,
DataColumnStoragePathFlag,
}
func layoutOptions() string {
return "available options are: " + strings.Join(filesystem.LayoutNames, ", ") + "."
}
@@ -62,10 +73,20 @@ func BeaconNodeOptions(c *cli.Context) ([]node.Option, error) {
return nil, errors.Wrap(err, "blob retention epoch")
}
blobPath := blobStoragePath(c)
layout, err := detectLayout(blobPath, c)
if err != nil {
return nil, errors.Wrap(err, "detecting blob storage layout")
}
if layout == filesystem.LayoutNameFlat {
log.Warnf("Existing '%s' blob storage layout detected. Consider setting the flag --%s=%s for faster startup and more reliable pruning. Setting this flag will automatically migrate your existing blob storage to the newer layout on the next restart.",
filesystem.LayoutNameFlat, BlobStorageLayout.Name, filesystem.LayoutNameByEpoch)
}
blobStorageOptions := node.WithBlobStorageOptions(
filesystem.WithBlobRetentionEpochs(blobRetentionEpoch),
filesystem.WithBasePath(blobStoragePath(c)),
filesystem.WithLayout(c.String(BlobStorageLayout.Name)), // This is validated in the Action func for BlobStorageLayout.
filesystem.WithBasePath(blobPath),
filesystem.WithLayout(layout), // This is validated in the Action func for BlobStorageLayout.
)
dataColumnRetentionEpoch, err := dataColumnRetentionEpoch(c)
@@ -82,6 +103,53 @@ func BeaconNodeOptions(c *cli.Context) ([]node.Option, error) {
return opts, nil
}
// stringFlagGetter makes testing detectLayout easier
// because we don't need to mess with FlagSets and cli types.
type stringFlagGetter interface {
String(name string) string
}
// detectLayout determines which layout to use based on explicit user flags or by probing the
// blob directory to determine the previously used layout.
// - explicit: If the user has specified a layout flag, that layout is returned.
// - flat: If directories that look like flat layout's block root paths are present.
// - by-epoch: default if neither of the above is true.
func detectLayout(dir string, c stringFlagGetter) (string, error) {
explicit := c.String(BlobStorageLayout.Name)
if explicit != "" {
return explicit, nil
}
dir = path.Clean(dir)
// nosec: this path is provided by the node operator via flag
base, err := os.Open(dir) // #nosec G304
if err != nil {
// 'blobs' directory does not exist yet, so default to by-epoch.
return filesystem.LayoutNameByEpoch, nil
}
defer func() {
if err := base.Close(); err != nil {
log.WithError(err).Errorf("Could not close blob storage directory")
}
}()
// When we go looking for existing by-root directories, we only need to find one directory
// but one of those directories could be the `by-epoch` layout's top-level directory,
// and it seems possible that on some platforms we could get extra system directories that I don't
// know how to anticipate (looking at you, Windows), so I picked 16 as a small number with a generous
// amount of wiggle room to be confident that we'll likely see a by-root director if one exists.
entries, err := base.Readdirnames(16)
if err != nil {
return "", errors.Wrap(err, "reading blob storage directory")
}
for _, entry := range entries {
if filesystem.IsBlockRootDir(entry) {
return filesystem.LayoutNameFlat, nil
}
}
return filesystem.LayoutNameByEpoch, nil
}
func blobStoragePath(c *cli.Context) string {
blobsPath := c.Path(BlobStoragePathFlag.Name)
if blobsPath == "" {

View File

@@ -3,8 +3,14 @@ package storage
import (
"flag"
"fmt"
"os"
"path"
"path/filepath"
"strings"
"syscall"
"testing"
"github.com/OffchainLabs/prysm/v6/beacon-chain/db/filesystem"
"github.com/OffchainLabs/prysm/v6/cmd"
"github.com/OffchainLabs/prysm/v6/config/params"
"github.com/OffchainLabs/prysm/v6/consensus-types/primitives"
@@ -109,3 +115,105 @@ func TestDataColumnStoragePath_FlagSpecified(t *testing.T) {
assert.Equal(t, "/blah/blah", storagePath)
}
type mockStringFlagGetter struct {
v string
}
func (m mockStringFlagGetter) String(name string) string {
return m.v
}
func TestDetectLayout(t *testing.T) {
fakeRoot := "0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890"
require.Equal(t, true, filesystem.IsBlockRootDir(fakeRoot))
withFlatRoot := func(t *testing.T, dir string) {
require.NoError(t, os.MkdirAll(path.Join(dir, fakeRoot), 0o755))
}
withByEpoch := func(t *testing.T, dir string) {
require.NoError(t, os.MkdirAll(path.Join(dir, filesystem.PeriodicEpochBaseDir), 0o755))
}
cases := []struct {
name string
expected string
expectedErr error
setup func(t *testing.T, dir string)
getter mockStringFlagGetter
}{
{
name: "no blobs dir",
expected: filesystem.LayoutNameByEpoch,
},
{
name: "blobs dir without root dirs",
expected: filesystem.LayoutNameByEpoch,
// empty subdirectory under blobs which doesn't match the block root pattern
setup: func(t *testing.T, dir string) {
require.NoError(t, os.MkdirAll(path.Join(dir, "some-dir"), 0o755))
},
},
{
name: "blobs dir with root dir",
setup: withFlatRoot,
expected: filesystem.LayoutNameFlat,
},
{
name: "blobs dir with root dir overridden by flag",
setup: withFlatRoot,
expected: filesystem.LayoutNameByEpoch,
getter: mockStringFlagGetter{v: filesystem.LayoutNameByEpoch},
},
{
name: "only has by-epoch dir",
setup: withByEpoch,
expected: filesystem.LayoutNameByEpoch,
},
{
name: "contains by-epoch dir and root dirs",
setup: func(t *testing.T, dir string) {
withFlatRoot(t, dir)
withByEpoch(t, dir)
},
expected: filesystem.LayoutNameFlat,
},
{
name: "unreadable dir",
// It isn't detectLayout's job to detect any errors reading the directory,
// so it ignores errors from the os.Open call. But we can also get errors
// from readdirnames, but this is hard to simulate in a test. So in the test
// write a file in place of the dir, which will succeed in the Open call, but
// fail when read as a directory. This is why the expected error is syscall.ENOTDIR
// (syscall error code from using readdirnames syscall on an ordinary file).
setup: func(t *testing.T, dir string) {
parent := filepath.Dir(dir)
require.NoError(t, os.MkdirAll(parent, 0o755))
require.NoError(t, os.WriteFile(dir, []byte{}, 0o755))
},
expectedErr: syscall.ENOTDIR,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
dir := strings.Replace(t.Name(), " ", "_", -1)
dir = path.Join(os.TempDir(), dir)
if tc.setup != nil {
tc.setup(t, dir)
}
if tc.expectedErr != nil {
t.Log("hi")
}
layout, err := detectLayout(dir, tc.getter)
if tc.expectedErr != nil {
require.ErrorIs(t, err, tc.expectedErr)
return
}
require.NoError(t, err)
require.Equal(t, tc.expected, layout)
assert.Equal(t, tc.expectedErr, err)
assert.Equal(t, tc.expected, layout)
})
}
}