Enable Backfill in E2E (#13524)

* enable backfill for devmode * enable backfill * gaz * move to its own package * fix panic * fix bug * gaz * kasey's review
2026-01-08 21:08:10 -05:00 · 2024-01-26 12:37:41 +08:00
parent 9c13d47f4c
commit 22b6d1751d
15 changed files with 96 additions and 43 deletions
--- a/beacon-chain/sync/backfill/BUILD.bazel
+++ b/beacon-chain/sync/backfill/BUILD.bazel
@@ -54,6 +54,7 @@ go_test(
    ],
    embed = [":go_default_library"],
    deps = [
+        "//beacon-chain/core/helpers:go_default_library",
        "//beacon-chain/core/signing:go_default_library",
        "//beacon-chain/db:go_default_library",
        "//beacon-chain/p2p/testing:go_default_library",
--- a/beacon-chain/sync/backfill/service.go
+++ b/beacon-chain/sync/backfill/service.go
@@ -227,6 +227,10 @@ func (s *Service) Start() {
 	}
 	s.ms.setClock(clock)

+	if s.store.isGenesisSync() {
+		log.Info("Exiting backfill service as the node has been initialized with a genesis state or the backfill status is missing")
+		return
+	}
 	status := s.store.status()
 	// Exit early if there aren't going to be any batches to backfill.
 	if primitives.Slot(status.LowSlot) <= s.ms.minimumSlot() {
@@ -293,8 +297,10 @@ func minimumBackfillSlot(current primitives.Slot) primitives.Slot {
 		oe = slots.MaxSafeEpoch()
 	}
 	offset := slots.UnsafeEpochStart(oe)
-	if offset > current {
-		return 0
+	if offset >= current {
+		// Slot 0 is the genesis block, therefore the signature in it is invalid.
+		// To prevent us from rejecting a batch, we restrict the minimum backfill batch till only slot 1
+		return 1
 	}
 	return current - offset
 }
--- a/beacon-chain/sync/backfill/service_test.go
+++ b/beacon-chain/sync/backfill/service_test.go
@@ -5,9 +5,11 @@ import (
 	"testing"
 	"time"

+	"github.com/prysmaticlabs/prysm/v4/beacon-chain/core/helpers"
 	p2ptest "github.com/prysmaticlabs/prysm/v4/beacon-chain/p2p/testing"
 	"github.com/prysmaticlabs/prysm/v4/beacon-chain/startup"
 	"github.com/prysmaticlabs/prysm/v4/beacon-chain/state"
+	"github.com/prysmaticlabs/prysm/v4/config/params"
 	"github.com/prysmaticlabs/prysm/v4/consensus-types/primitives"
 	"github.com/prysmaticlabs/prysm/v4/proto/dbval"
 	"github.com/prysmaticlabs/prysm/v4/testing/require"
@@ -75,6 +77,20 @@ func TestServiceInit(t *testing.T) {
 	}
 }

+func TestMinimumBackfillSlot(t *testing.T) {
+	oe := helpers.MinEpochsForBlockRequests()
+
+	currSlot := (oe + 100).Mul(uint64(params.BeaconConfig().SlotsPerEpoch))
+	minSlot := minimumBackfillSlot(primitives.Slot(currSlot))
+	require.Equal(t, 100*params.BeaconConfig().SlotsPerEpoch, minSlot)
+
+	oe = helpers.MinEpochsForBlockRequests()
+
+	currSlot = oe.Mul(uint64(params.BeaconConfig().SlotsPerEpoch))
+	minSlot = minimumBackfillSlot(primitives.Slot(currSlot))
+	require.Equal(t, primitives.Slot(1), minSlot)
+}
+
 func testReadN(t *testing.T, ctx context.Context, c chan batch, n int, into []batch) []batch {
 	for i := 0; i < n; i++ {
 		select {
--- a/beacon-chain/sync/backfill/status.go
+++ b/beacon-chain/sync/backfill/status.go
@@ -149,6 +149,12 @@ func (s *Store) swapStatus(bs *dbval.BackfillStatus) {
 	s.bs = bs
 }

+func (s *Store) isGenesisSync() bool {
+	s.RLock()
+	defer s.RUnlock()
+	return s.genesisSync
+}
+
 // originState looks up the state for the checkpoint sync origin. This is a hack, because StatusUpdater is the only
 // thing that needs db access and it has the origin root handy, so it's convenient to look it up here. The state is
 // needed by the verifier.
--- a/beacon-chain/sync/backfill/worker.go
+++ b/beacon-chain/sync/backfill/worker.go
@@ -40,11 +40,13 @@ func (w *p2pWorker) handle(ctx context.Context, b batch) batch {
 	dlt := time.Now()
 	backfillBatchTimeDownloading.Observe(float64(dlt.Sub(start).Milliseconds()))
 	if err != nil {
+		log.WithError(err).WithFields(b.logFields()).Debug("Batch requesting failed")
 		return b.withRetryableError(err)
 	}
 	vb, err := w.v.verify(results)
 	backfillBatchTimeVerifying.Observe(float64(time.Since(dlt).Milliseconds()))
 	if err != nil {
+		log.WithError(err).WithFields(b.logFields()).Debug("Batch validation failed")
 		return b.withRetryableError(err)
 	}
 	// This is a hack to get the rough size of the batch. This helps us approximate the amount of memory needed
--- a/cmd/beacon-chain/BUILD.bazel
+++ b/cmd/beacon-chain/BUILD.bazel
@@ -22,6 +22,7 @@ go_library(
        "//cmd/beacon-chain/jwt:go_default_library",
        "//cmd/beacon-chain/storage:go_default_library",
        "//cmd/beacon-chain/sync/backfill:go_default_library",
+        "//cmd/beacon-chain/sync/backfill/flags:go_default_library",
        "//cmd/beacon-chain/sync/checkpoint:go_default_library",
        "//cmd/beacon-chain/sync/genesis:go_default_library",
        "//config/features:go_default_library",
--- a/cmd/beacon-chain/main.go
+++ b/cmd/beacon-chain/main.go
@@ -20,7 +20,8 @@ import (
 	"github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/flags"
 	jwtcommands "github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/jwt"
 	"github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/storage"
-	"github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/sync/backfill"
+	backfill "github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/sync/backfill"
+	bflags "github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/sync/backfill/flags"
 	"github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/sync/checkpoint"
 	"github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/sync/genesis"
 	"github.com/prysmaticlabs/prysm/v4/config/features"
@@ -139,9 +140,9 @@ var appFlags = []cli.Flag{
 	flags.JwtId,
 	storage.BlobStoragePathFlag,
 	storage.BlobRetentionEpochFlag,
-	backfill.EnableExperimentalBackfill,
-	backfill.BackfillBatchSize,
-	backfill.BackfillWorkerCount,
+	bflags.EnableExperimentalBackfill,
+	bflags.BackfillBatchSize,
+	bflags.BackfillWorkerCount,
 }

 func init() {
--- a/cmd/beacon-chain/sync/backfill/BUILD.bazel
+++ b/cmd/beacon-chain/sync/backfill/BUILD.bazel
@@ -8,6 +8,7 @@ go_library(
    deps = [
        "//beacon-chain/node:go_default_library",
        "//beacon-chain/sync/backfill:go_default_library",
+        "//cmd/beacon-chain/sync/backfill/flags:go_default_library",
        "@com_github_urfave_cli_v2//:go_default_library",
    ],
 )
--- a/cmd/beacon-chain/sync/backfill/flags/BUILD.bazel
+++ b/cmd/beacon-chain/sync/backfill/flags/BUILD.bazel
@@ -0,0 +1,9 @@
+load("@prysm//tools/go:def.bzl", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["flags.go"],
+    importpath = "github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/sync/backfill/flags",
+    visibility = ["//visibility:public"],
+    deps = ["@com_github_urfave_cli_v2//:go_default_library"],
+)
--- a/cmd/beacon-chain/sync/backfill/flags/flags.go
+++ b/cmd/beacon-chain/sync/backfill/flags/flags.go
@@ -0,0 +1,38 @@
+package flags
+
+import (
+	"github.com/urfave/cli/v2"
+)
+
+var (
+	backfillBatchSizeName   = "backfill-batch-size"
+	backfillWorkerCountName = "backfill-worker-count"
+
+	// EnableExperimentalBackfill enables backfill for checkpoint synced nodes.
+	// This flag will be removed onced backfill is enabled by default.
+	EnableExperimentalBackfill = &cli.BoolFlag{
+		Name: "enable-experimental-backfill",
+		Usage: "Backfill is still experimental at this time." +
+			"It will only be enabled if this flag is specified and the node was started using checkpoint sync.",
+	}
+	// BackfillBatchSize allows users to tune block backfill request sizes to maximize network utilization
+	// at the cost of higher memory.
+	BackfillBatchSize = &cli.Uint64Flag{
+		Name: backfillBatchSizeName,
+		Usage: "Number of blocks per backfill batch. " +
+			"A larger number will request more blocks at once from peers, but also consume more system memory to " +
+			"hold batches in memory during processing. This has a multiplicative effect with " + backfillWorkerCountName,
+		Value: 64,
+	}
+	// BackfillWorkerCount allows users to tune the number of concurrent backfill batches to download, to maximize
+	// network utilization at the cost of higher memory.
+	BackfillWorkerCount = &cli.IntFlag{
+		Name: backfillWorkerCountName,
+		Usage: "Number of concurrent backfill batch requests. " +
+			"A larger number will better utilize network resources, up to a system-dependent limit, but will also " +
+			"consume more system memory to hold batches in memory during processing. Multiply by backfill-batch-size and " +
+			"average block size (~2MB before deneb) to find the right number for your system. " +
+			"This has a multiplicatice effect with " + backfillBatchSizeName,
+		Value: 2,
+	}
+)
--- a/cmd/beacon-chain/sync/backfill/options.go
+++ b/cmd/beacon-chain/sync/backfill/options.go
@@ -3,49 +3,18 @@ package backfill
 import (
 	"github.com/prysmaticlabs/prysm/v4/beacon-chain/node"
 	"github.com/prysmaticlabs/prysm/v4/beacon-chain/sync/backfill"
+	"github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/sync/backfill/flags"
 	"github.com/urfave/cli/v2"
 )

-var (
-	backfillBatchSizeName   = "backfill-batch-size"
-	backfillWorkerCountName = "backfill-worker-count"
-	// EnableExperimentalBackfill enables backfill for checkpoint synced nodes.
-	// This flag will be removed onced backfill is enabled by default.
-	EnableExperimentalBackfill = &cli.BoolFlag{
-		Name: "enable-experimental-backfill",
-		Usage: "Backfill is still experimental at this time." +
-			"It will only be enabled if this flag is specified and the node was started using checkpoint sync.",
-	}
-	// BackfillBatchSize allows users to tune block backfill request sizes to maximize network utilization
-	// at the cost of higher memory.
-	BackfillBatchSize = &cli.Uint64Flag{
-		Name: backfillBatchSizeName,
-		Usage: "Number of blocks per backfill batch. " +
-			"A larger number will request more blocks at once from peers, but also consume more system memory to " +
-			"hold batches in memory during processing. This has a multiplicative effect with " + backfillWorkerCountName,
-		Value: 64,
-	}
-	// BackfillWorkerCount allows users to tune the number of concurrent backfill batches to download, to maximize
-	// network utilization at the cost of higher memory.
-	BackfillWorkerCount = &cli.IntFlag{
-		Name: backfillWorkerCountName,
-		Usage: "Number of concurrent backfill batch requests. " +
-			"A larger number will better utilize network resources, up to a system-dependent limit, but will also " +
-			"consume more system memory to hold batches in memory during processing. Multiply by backfill-batch-size and " +
-			"average block size (~2MB before deneb) to find the right number for your system. " +
-			"This has a multiplicatice effect with " + backfillBatchSizeName,
-		Value: 2,
-	}
-)
-
 // BeaconNodeOptions sets the appropriate functional opts on the *node.BeaconNode value, to decouple options
 // from flag parsing.
 func BeaconNodeOptions(c *cli.Context) ([]node.Option, error) {
 	opt := func(node *node.BeaconNode) (err error) {
 		node.BackfillOpts = []backfill.ServiceOption{
-			backfill.WithBatchSize(c.Uint64(BackfillBatchSize.Name)),
-			backfill.WithWorkerCount(c.Int(BackfillWorkerCount.Name)),
-			backfill.WithEnableBackfill(c.Bool(EnableExperimentalBackfill.Name)),
+			backfill.WithBatchSize(c.Uint64(flags.BackfillBatchSize.Name)),
+			backfill.WithWorkerCount(c.Int(flags.BackfillWorkerCount.Name)),
+			backfill.WithEnableBackfill(c.Bool(flags.EnableExperimentalBackfill.Name)),
 		}
 		return nil
 	}
--- a/cmd/beacon-chain/usage.go
+++ b/cmd/beacon-chain/usage.go
@@ -8,7 +8,7 @@ import (
 	"github.com/prysmaticlabs/prysm/v4/cmd"
 	"github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/flags"
 	"github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/storage"
-	"github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/sync/backfill"
+	backfill "github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/sync/backfill/flags"
 	"github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/sync/checkpoint"
 	"github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/sync/genesis"
 	"github.com/prysmaticlabs/prysm/v4/config/features"
--- a/config/features/BUILD.bazel
+++ b/config/features/BUILD.bazel
@@ -12,6 +12,7 @@ go_library(
    visibility = ["//visibility:public"],
    deps = [
        "//cmd:go_default_library",
+        "//cmd/beacon-chain/sync/backfill/flags:go_default_library",
        "//config/params:go_default_library",
        "@com_github_sirupsen_logrus//:go_default_library",
        "@com_github_urfave_cli_v2//:go_default_library",
--- a/config/features/flags.go
+++ b/config/features/flags.go
@@ -3,6 +3,7 @@ package features
 import (
 	"time"

+	backfill "github.com/prysmaticlabs/prysm/v4/cmd/beacon-chain/sync/backfill/flags"
 	"github.com/urfave/cli/v2"
 )

@@ -161,6 +162,7 @@ var devModeFlags = []cli.Flag{
 	enableVerboseSigVerification,
 	EnableEIP4881,
 	enableExperimentalState,
+	backfill.EnableExperimentalBackfill,
 }

 // ValidatorFlags contains a list of all the feature flags that apply to the validator client.
--- a/testing/endtoend/components/beacon_node.go
+++ b/testing/endtoend/components/beacon_node.go
@@ -283,7 +283,7 @@ func (node *BeaconNode) Start(ctx context.Context) error {
 	// on our features or the beacon index is a multiplier of 2 (idea is to split nodes
 	// equally down the line with one group having feature flags and the other without
 	// feature flags; this is to allow A-B testing on new features)
-	if !config.TestFeature || index%2 == 0 {
+	if !config.TestFeature || index != 1 {
 		args = append(args, features.E2EBeaconChainFlags...)
 	}
 	if config.UseBuilder {