From 9c4774b82ede85c184cbbbc235b000fd9dd1be12 Mon Sep 17 00:00:00 2001 From: kasey <489222+kasey@users.noreply.github.com> Date: Wed, 22 Oct 2025 15:09:18 -0500 Subject: [PATCH] default new blob storage layouts to by-epoch (#15904) * default new blob storage layouts to by-epoch also, do not log migration message until we see a directory that needs to be migrated Co-authored-by: Manu NALEPA * manu feedback --------- Co-authored-by: Kasey Kirkham Co-authored-by: Manu NALEPA --- beacon-chain/db/filesystem/iteration.go | 3 +- beacon-chain/db/filesystem/iteration_test.go | 2 +- beacon-chain/db/filesystem/layout.go | 17 ++- beacon-chain/db/filesystem/layout_by_epoch.go | 18 +-- beacon-chain/db/filesystem/layout_flat.go | 6 +- changelog/kasey_default-layout-by-epoch.md | 2 + cmd/beacon-chain/storage/BUILD.bazel | 3 + cmd/beacon-chain/storage/options.go | 78 ++++++++++++- cmd/beacon-chain/storage/options_test.go | 108 ++++++++++++++++++ 9 files changed, 213 insertions(+), 24 deletions(-) create mode 100644 changelog/kasey_default-layout-by-epoch.md diff --git a/beacon-chain/db/filesystem/iteration.go b/beacon-chain/db/filesystem/iteration.go index fed8f187e6..3cadf8d8ae 100644 --- a/beacon-chain/db/filesystem/iteration.go +++ b/beacon-chain/db/filesystem/iteration.go @@ -212,7 +212,8 @@ func filterNoop(_ string) bool { return true } -func isRootDir(p string) bool { +// IsBlockRootDir returns true if the path segment looks like a block root directory. +func IsBlockRootDir(p string) bool { dir := filepath.Base(p) return len(dir) == rootStringLen && strings.HasPrefix(dir, "0x") } diff --git a/beacon-chain/db/filesystem/iteration_test.go b/beacon-chain/db/filesystem/iteration_test.go index 25acd897a3..e2a44a17d0 100644 --- a/beacon-chain/db/filesystem/iteration_test.go +++ b/beacon-chain/db/filesystem/iteration_test.go @@ -188,7 +188,7 @@ func TestListDir(t *testing.T) { name: "root filter", dirPath: ".", expected: []string{childlessBlob.name, blobWithSsz.name, blobWithSszAndTmp.name}, - filter: isRootDir, + filter: IsBlockRootDir, }, { name: "ssz filter", diff --git a/beacon-chain/db/filesystem/layout.go b/beacon-chain/db/filesystem/layout.go index ab25e125b9..b3f3782637 100644 --- a/beacon-chain/db/filesystem/layout.go +++ b/beacon-chain/db/filesystem/layout.go @@ -19,12 +19,14 @@ import ( const ( // Full root in directory will be 66 chars, eg: // >>> len('0x0002fb4db510b8618b04dc82d023793739c26346a8b02eb73482e24b0fec0555') == 66 - rootStringLen = 66 - sszExt = "ssz" - partExt = "part" - periodicEpochBaseDir = "by-epoch" + rootStringLen = 66 + sszExt = "ssz" + partExt = "part" ) +// PeriodicEpochBaseDir is the name of the base directory for the by-epoch layout. +const PeriodicEpochBaseDir = "by-epoch" + const ( LayoutNameFlat = "flat" LayoutNameByEpoch = "by-epoch" @@ -130,11 +132,11 @@ func migrateLayout(fs afero.Fs, from, to fsLayout, cache *blobStorageSummaryCach if iter.atEOF() { return errLayoutNotDetected } - log.WithField("fromLayout", from.name()).WithField("toLayout", to.name()).Info("Migrating blob filesystem layout. This one-time operation can take extra time (up to a few minutes for systems with extended blob storage and a cold disk cache).") lastMoved := "" parentDirs := make(map[string]bool) // this map should have < 65k keys by design moved := 0 dc := newDirCleaner() + migrationLogged := false for ident, err := iter.next(); !errors.Is(err, io.EOF); ident, err = iter.next() { if err != nil { if errors.Is(err, errIdentFailure) { @@ -146,6 +148,11 @@ func migrateLayout(fs afero.Fs, from, to fsLayout, cache *blobStorageSummaryCach } return errors.Wrapf(errMigrationFailure, "failed to iterate previous layout structure while migrating blobs, err=%s", err.Error()) } + if !migrationLogged { + log.WithField("fromLayout", from.name()).WithField("toLayout", to.name()). + Info("Migrating blob filesystem layout. This one-time operation can take extra time (up to a few minutes for systems with extended blob storage and a cold disk cache).") + migrationLogged = true + } src := from.dir(ident) target := to.dir(ident) if src != lastMoved { diff --git a/beacon-chain/db/filesystem/layout_by_epoch.go b/beacon-chain/db/filesystem/layout_by_epoch.go index 08f28cb1dd..3e6adb4c93 100644 --- a/beacon-chain/db/filesystem/layout_by_epoch.go +++ b/beacon-chain/db/filesystem/layout_by_epoch.go @@ -34,7 +34,7 @@ func (l *periodicEpochLayout) name() string { func (l *periodicEpochLayout) blockParentDirs(ident blobIdent) []string { return []string{ - periodicEpochBaseDir, + PeriodicEpochBaseDir, l.periodDir(ident.epoch), l.epochDir(ident.epoch), } @@ -50,28 +50,28 @@ func (l *periodicEpochLayout) notify(ident blobIdent) error { // If before == 0, it won't be used as a filter and all idents will be returned. func (l *periodicEpochLayout) iterateIdents(before primitives.Epoch) (*identIterator, error) { - _, err := l.fs.Stat(periodicEpochBaseDir) + _, err := l.fs.Stat(PeriodicEpochBaseDir) if err != nil { if os.IsNotExist(err) { return &identIterator{eof: true}, nil // The directory is non-existent, which is fine; stop iteration. } - return nil, errors.Wrapf(err, "error reading path %s", periodicEpochBaseDir) + return nil, errors.Wrapf(err, "error reading path %s", PeriodicEpochBaseDir) } // iterate root, which should have directories named by "period" - entries, err := listDir(l.fs, periodicEpochBaseDir) + entries, err := listDir(l.fs, PeriodicEpochBaseDir) if err != nil { - return nil, errors.Wrapf(err, "failed to list %s", periodicEpochBaseDir) + return nil, errors.Wrapf(err, "failed to list %s", PeriodicEpochBaseDir) } return &identIterator{ fs: l.fs, - path: periodicEpochBaseDir, + path: PeriodicEpochBaseDir, // Please see comments on the `layers` field in `identIterator`` if the role of the layers is unclear. layers: []layoutLayer{ {populateIdent: populateNoop, filter: isBeforePeriod(before)}, {populateIdent: populateEpoch, filter: isBeforeEpoch(before)}, - {populateIdent: populateRoot, filter: isRootDir}, // extract root from path - {populateIdent: populateIndex, filter: isSszFile}, // extract index from filename + {populateIdent: populateRoot, filter: IsBlockRootDir}, // extract root from path + {populateIdent: populateIndex, filter: isSszFile}, // extract index from filename }, entries: entries, }, nil @@ -98,7 +98,7 @@ func (l *periodicEpochLayout) epochDir(epoch primitives.Epoch) string { } func (l *periodicEpochLayout) periodDir(epoch primitives.Epoch) string { - return filepath.Join(periodicEpochBaseDir, fmt.Sprintf("%d", periodForEpoch(epoch))) + return filepath.Join(PeriodicEpochBaseDir, fmt.Sprintf("%d", periodForEpoch(epoch))) } func (l *periodicEpochLayout) sszPath(n blobIdent) string { diff --git a/beacon-chain/db/filesystem/layout_flat.go b/beacon-chain/db/filesystem/layout_flat.go index 5da28711d7..3206082ddf 100644 --- a/beacon-chain/db/filesystem/layout_flat.go +++ b/beacon-chain/db/filesystem/layout_flat.go @@ -30,7 +30,7 @@ func (l *flatLayout) iterateIdents(before primitives.Epoch) (*identIterator, err if os.IsNotExist(err) { return &identIterator{eof: true}, nil // The directory is non-existent, which is fine; stop iteration. } - return nil, errors.Wrapf(err, "error reading path %s", periodicEpochBaseDir) + return nil, errors.Wrap(err, "error reading blob base dir") } entries, err := listDir(l.fs, ".") if err != nil { @@ -199,10 +199,10 @@ func (l *flatSlotReader) isSSZAndBefore(fname string) bool { // the epoch can be determined. func isFlatCachedAndBefore(cache *blobStorageSummaryCache, before primitives.Epoch) func(string) bool { if before == 0 { - return isRootDir + return IsBlockRootDir } return func(p string) bool { - if !isRootDir(p) { + if !IsBlockRootDir(p) { return false } root, err := rootFromPath(p) diff --git a/changelog/kasey_default-layout-by-epoch.md b/changelog/kasey_default-layout-by-epoch.md new file mode 100644 index 0000000000..f467dbf4f0 --- /dev/null +++ b/changelog/kasey_default-layout-by-epoch.md @@ -0,0 +1,2 @@ +### Changed +- Use the `by-epoch' blob storage layout by default and log a warning to users who continue to use the flat layout, encouraging them to switch. diff --git a/cmd/beacon-chain/storage/BUILD.bazel b/cmd/beacon-chain/storage/BUILD.bazel index 63d0d03ef2..c966205507 100644 --- a/cmd/beacon-chain/storage/BUILD.bazel +++ b/cmd/beacon-chain/storage/BUILD.bazel @@ -12,6 +12,7 @@ go_library( "//config/params:go_default_library", "//consensus-types/primitives:go_default_library", "@com_github_pkg_errors//:go_default_library", + "@com_github_sirupsen_logrus//:go_default_library", "@com_github_urfave_cli_v2//:go_default_library", ], ) @@ -19,8 +20,10 @@ go_library( go_test( name = "go_default_test", srcs = ["options_test.go"], + data = glob(["testdata/**"]), embed = [":go_default_library"], deps = [ + "//beacon-chain/db/filesystem:go_default_library", "//cmd:go_default_library", "//config/params:go_default_library", "//consensus-types/primitives:go_default_library", diff --git a/cmd/beacon-chain/storage/options.go b/cmd/beacon-chain/storage/options.go index 598ef99a86..e930c30aae 100644 --- a/cmd/beacon-chain/storage/options.go +++ b/cmd/beacon-chain/storage/options.go @@ -1,6 +1,8 @@ package storage import ( + "fmt" + "os" "path" "strings" @@ -10,6 +12,7 @@ import ( "github.com/OffchainLabs/prysm/v6/config/params" "github.com/OffchainLabs/prysm/v6/consensus-types/primitives" "github.com/pkg/errors" + log "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" ) @@ -25,9 +28,9 @@ var ( Aliases: []string{"extend-blob-retention-epoch"}, } BlobStorageLayout = &cli.StringFlag{ - Name: "blob-storage-layout", - Usage: layoutFlagUsage(), - Value: filesystem.LayoutNameFlat, + Name: "blob-storage-layout", + Usage: layoutFlagUsage(), + DefaultText: fmt.Sprintf("\"%s\", unless a different existing layout is detected", filesystem.LayoutNameByEpoch), } DataColumnStoragePathFlag = &cli.PathFlag{ Name: "data-column-path", @@ -35,6 +38,14 @@ var ( } ) +// Flags is the list of CLI flags for configuring blob storage. +var Flags = []cli.Flag{ + BlobStoragePathFlag, + BlobRetentionEpochFlag, + BlobStorageLayout, + DataColumnStoragePathFlag, +} + func layoutOptions() string { return "available options are: " + strings.Join(filesystem.LayoutNames, ", ") + "." } @@ -62,10 +73,20 @@ func BeaconNodeOptions(c *cli.Context) ([]node.Option, error) { return nil, errors.Wrap(err, "blob retention epoch") } + blobPath := blobStoragePath(c) + layout, err := detectLayout(blobPath, c) + if err != nil { + return nil, errors.Wrap(err, "detecting blob storage layout") + } + if layout == filesystem.LayoutNameFlat { + log.Warnf("Existing '%s' blob storage layout detected. Consider setting the flag --%s=%s for faster startup and more reliable pruning. Setting this flag will automatically migrate your existing blob storage to the newer layout on the next restart.", + + filesystem.LayoutNameFlat, BlobStorageLayout.Name, filesystem.LayoutNameByEpoch) + } blobStorageOptions := node.WithBlobStorageOptions( filesystem.WithBlobRetentionEpochs(blobRetentionEpoch), - filesystem.WithBasePath(blobStoragePath(c)), - filesystem.WithLayout(c.String(BlobStorageLayout.Name)), // This is validated in the Action func for BlobStorageLayout. + filesystem.WithBasePath(blobPath), + filesystem.WithLayout(layout), // This is validated in the Action func for BlobStorageLayout. ) dataColumnRetentionEpoch, err := dataColumnRetentionEpoch(c) @@ -82,6 +103,53 @@ func BeaconNodeOptions(c *cli.Context) ([]node.Option, error) { return opts, nil } +// stringFlagGetter makes testing detectLayout easier +// because we don't need to mess with FlagSets and cli types. +type stringFlagGetter interface { + String(name string) string +} + +// detectLayout determines which layout to use based on explicit user flags or by probing the +// blob directory to determine the previously used layout. +// - explicit: If the user has specified a layout flag, that layout is returned. +// - flat: If directories that look like flat layout's block root paths are present. +// - by-epoch: default if neither of the above is true. +func detectLayout(dir string, c stringFlagGetter) (string, error) { + explicit := c.String(BlobStorageLayout.Name) + if explicit != "" { + return explicit, nil + } + + dir = path.Clean(dir) + // nosec: this path is provided by the node operator via flag + base, err := os.Open(dir) // #nosec G304 + if err != nil { + // 'blobs' directory does not exist yet, so default to by-epoch. + return filesystem.LayoutNameByEpoch, nil + } + defer func() { + if err := base.Close(); err != nil { + log.WithError(err).Errorf("Could not close blob storage directory") + } + }() + + // When we go looking for existing by-root directories, we only need to find one directory + // but one of those directories could be the `by-epoch` layout's top-level directory, + // and it seems possible that on some platforms we could get extra system directories that I don't + // know how to anticipate (looking at you, Windows), so I picked 16 as a small number with a generous + // amount of wiggle room to be confident that we'll likely see a by-root director if one exists. + entries, err := base.Readdirnames(16) + if err != nil { + return "", errors.Wrap(err, "reading blob storage directory") + } + for _, entry := range entries { + if filesystem.IsBlockRootDir(entry) { + return filesystem.LayoutNameFlat, nil + } + } + return filesystem.LayoutNameByEpoch, nil +} + func blobStoragePath(c *cli.Context) string { blobsPath := c.Path(BlobStoragePathFlag.Name) if blobsPath == "" { diff --git a/cmd/beacon-chain/storage/options_test.go b/cmd/beacon-chain/storage/options_test.go index 7136a8911a..a8ecd23c56 100644 --- a/cmd/beacon-chain/storage/options_test.go +++ b/cmd/beacon-chain/storage/options_test.go @@ -3,8 +3,14 @@ package storage import ( "flag" "fmt" + "os" + "path" + "path/filepath" + "strings" + "syscall" "testing" + "github.com/OffchainLabs/prysm/v6/beacon-chain/db/filesystem" "github.com/OffchainLabs/prysm/v6/cmd" "github.com/OffchainLabs/prysm/v6/config/params" "github.com/OffchainLabs/prysm/v6/consensus-types/primitives" @@ -109,3 +115,105 @@ func TestDataColumnStoragePath_FlagSpecified(t *testing.T) { assert.Equal(t, "/blah/blah", storagePath) } + +type mockStringFlagGetter struct { + v string +} + +func (m mockStringFlagGetter) String(name string) string { + return m.v +} + +func TestDetectLayout(t *testing.T) { + fakeRoot := "0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890" + require.Equal(t, true, filesystem.IsBlockRootDir(fakeRoot)) + withFlatRoot := func(t *testing.T, dir string) { + require.NoError(t, os.MkdirAll(path.Join(dir, fakeRoot), 0o755)) + } + withByEpoch := func(t *testing.T, dir string) { + require.NoError(t, os.MkdirAll(path.Join(dir, filesystem.PeriodicEpochBaseDir), 0o755)) + } + + cases := []struct { + name string + expected string + expectedErr error + setup func(t *testing.T, dir string) + getter mockStringFlagGetter + }{ + { + name: "no blobs dir", + expected: filesystem.LayoutNameByEpoch, + }, + { + name: "blobs dir without root dirs", + expected: filesystem.LayoutNameByEpoch, + // empty subdirectory under blobs which doesn't match the block root pattern + setup: func(t *testing.T, dir string) { + require.NoError(t, os.MkdirAll(path.Join(dir, "some-dir"), 0o755)) + }, + }, + { + name: "blobs dir with root dir", + setup: withFlatRoot, + expected: filesystem.LayoutNameFlat, + }, + { + name: "blobs dir with root dir overridden by flag", + setup: withFlatRoot, + expected: filesystem.LayoutNameByEpoch, + getter: mockStringFlagGetter{v: filesystem.LayoutNameByEpoch}, + }, + { + name: "only has by-epoch dir", + setup: withByEpoch, + expected: filesystem.LayoutNameByEpoch, + }, + { + name: "contains by-epoch dir and root dirs", + setup: func(t *testing.T, dir string) { + withFlatRoot(t, dir) + withByEpoch(t, dir) + }, + expected: filesystem.LayoutNameFlat, + }, + { + name: "unreadable dir", + // It isn't detectLayout's job to detect any errors reading the directory, + // so it ignores errors from the os.Open call. But we can also get errors + // from readdirnames, but this is hard to simulate in a test. So in the test + // write a file in place of the dir, which will succeed in the Open call, but + // fail when read as a directory. This is why the expected error is syscall.ENOTDIR + // (syscall error code from using readdirnames syscall on an ordinary file). + setup: func(t *testing.T, dir string) { + parent := filepath.Dir(dir) + require.NoError(t, os.MkdirAll(parent, 0o755)) + require.NoError(t, os.WriteFile(dir, []byte{}, 0o755)) + }, + expectedErr: syscall.ENOTDIR, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + dir := strings.Replace(t.Name(), " ", "_", -1) + dir = path.Join(os.TempDir(), dir) + if tc.setup != nil { + tc.setup(t, dir) + } + if tc.expectedErr != nil { + t.Log("hi") + } + layout, err := detectLayout(dir, tc.getter) + if tc.expectedErr != nil { + require.ErrorIs(t, err, tc.expectedErr) + return + } + require.NoError(t, err) + require.Equal(t, tc.expected, layout) + + assert.Equal(t, tc.expectedErr, err) + assert.Equal(t, tc.expected, layout) + }) + } +}