Merge branch 'develop' into deflake-evaluator

reducing retry delay as well as the blob transaction count for builder tests
2026-01-30 15:48:00 -05:00 · 2026-01-29 14:22:09 -08:00 · 2026-01-29 09:45:52 -08:00 · 2026-01-29 08:20:00 -06:00 · 2026-01-28 13:31:32 -08:00 · 2026-01-28 11:21:58 -06:00
7 changed files with 228 additions and 90 deletions
--- a/changelog/james-prysm_deflake-evaluator.md
+++ b/changelog/james-prysm_deflake-evaluator.md
@@ -0,0 +1,3 @@
+### Ignored
+
+- adding some short retries for some end to end evaluators in an attempt to deflake tests.
--- a/testing/endtoend/components/eth1/transactions.go
+++ b/testing/endtoend/components/eth1/transactions.go
@@ -40,6 +40,7 @@ type TransactionGenerator struct {
 	cancel        context.CancelFunc
 	paused        bool
 	useLargeBlobs bool // Use large blob transactions (6 blobs per tx) for BPO testing
+	blobTxCount   int  // Number of blob transactions per slot (0 means default of 5)
 }

 func (t *TransactionGenerator) UnderlyingProcess() *os.Process {
@@ -48,8 +49,8 @@ func (t *TransactionGenerator) UnderlyingProcess() *os.Process {
 	return &os.Process{}
 }

-func NewTransactionGenerator(keystore string, seed int64, useLargeBlobs bool) *TransactionGenerator {
-	return &TransactionGenerator{keystore: keystore, seed: seed, useLargeBlobs: useLargeBlobs}
+func NewTransactionGenerator(keystore string, seed int64, useLargeBlobs bool, blobTxCount int) *TransactionGenerator {
+	return &TransactionGenerator{keystore: keystore, seed: seed, useLargeBlobs: useLargeBlobs, blobTxCount: blobTxCount}
 }

 func (t *TransactionGenerator) Start(ctx context.Context) error {
@@ -114,7 +115,7 @@ func (t *TransactionGenerator) Start(ctx context.Context) error {
 				continue
 			}
 			backend := ethclient.NewClient(client)
-			err = SendTransaction(client, mineKey.PrivateKey, gasPrice, mineKey.Address.String(), txCount, backend, false, t.useLargeBlobs)
+			err = SendTransaction(client, mineKey.PrivateKey, gasPrice, mineKey.Address.String(), txCount, backend, false, t.useLargeBlobs, t.blobTxCount)
 			if err != nil {
 				return err
 			}
@@ -128,7 +129,7 @@ func (s *TransactionGenerator) Started() <-chan struct{} {
 	return s.started
 }

-func SendTransaction(client *rpc.Client, key *ecdsa.PrivateKey, gasPrice *big.Int, addr string, txCount uint64, backend *ethclient.Client, al bool, useLargeBlobs bool) error {
+func SendTransaction(client *rpc.Client, key *ecdsa.PrivateKey, gasPrice *big.Int, addr string, txCount uint64, backend *ethclient.Client, al bool, useLargeBlobs bool, blobTxCount int) error {
 	sender := common.HexToAddress(addr)
 	nonce, err := backend.PendingNonceAt(context.Background(), fundedAccount.Address)
 	if err != nil {
@@ -150,14 +151,19 @@ func SendTransaction(client *rpc.Client, key *ecdsa.PrivateKey, gasPrice *big.In
 	clock := startup.NewClock(e2e.TestParams.CLGenesisTime, [32]byte{})
 	isPostFulu := clock.CurrentEpoch() >= params.BeaconConfig().FuluForkEpoch

+	// Default to 5 blob transactions per slot if not configured.
+	numBlobTxs := blobTxCount
+	if numBlobTxs <= 0 {
+		numBlobTxs = 5
+	}
+
 	g, _ := errgroup.WithContext(context.Background())
-	txs := make([]*types.Transaction, 10)
+	txs := make([]*types.Transaction, numBlobTxs)

 	// Send blob transactions - use different versions pre/post Fulu
 	if isPostFulu {
 		logrus.Info("Sending blob transactions with cell proofs")
-		// Reduced from 10 to 5 to reduce load and prevent builder/EL timeouts
-		for index := range uint64(5) {
+		for index := range uint64(numBlobTxs) {

 			g.Go(func() error {
 				tx, err := RandomBlobCellTx(client, fundedAccount.Address, nonce+index, gasPrice, chainid, al, useLargeBlobs)
@@ -176,8 +182,7 @@ func SendTransaction(client *rpc.Client, key *ecdsa.PrivateKey, gasPrice *big.In
 		}
 	} else {
 		logrus.Info("Sending blob transactions with sidecars")
-		// Reduced from 10 to 5 to reduce load and prevent builder/EL timeouts
-		for index := range uint64(5) {
+		for index := range uint64(numBlobTxs) {

 			g.Go(func() error {
 				tx, err := RandomBlobTx(client, fundedAccount.Address, nonce+index, gasPrice, chainid, al, useLargeBlobs)
--- a/testing/endtoend/endtoend_test.go
+++ b/testing/endtoend/endtoend_test.go
@@ -225,9 +225,9 @@ func (r *testRunner) testDepositsAndTx(ctx context.Context, g *errgroup.Group,
 		if err := helpers.ComponentsStarted(ctx, []e2etypes.ComponentRunner{r.depositor}); err != nil {
 			return errors.Wrap(err, "testDepositsAndTx unable to run, depositor did not Start")
 		}
-        go func() {
-            if r.config.TestDeposits {
-                log.Info("Running deposit tests")
+		go func() {
+			if r.config.TestDeposits {
+				log.Info("Running deposit tests")
 				// The validators with an index < minGenesisActiveCount all have deposits already from the chain start.
 				// Skip all of those chain start validators by seeking to minGenesisActiveCount in the validator list
 				// for further deposit testing.
@@ -238,12 +238,12 @@ func (r *testRunner) testDepositsAndTx(ctx context.Context, g *errgroup.Group,
 						r.t.Error(errors.Wrap(err, "depositor.SendAndMine failed"))
 					}
 				}
-            }
-            // Only generate background transactions when relevant for the test.
-            if r.config.TestDeposits || r.config.TestFeature || r.config.UseBuilder {
-                r.testTxGeneration(ctx, g, keystorePath, []e2etypes.ComponentRunner{})
-            }
-        }()
+			}
+			// Only generate background transactions when relevant for the test.
+			if r.config.TestDeposits || r.config.TestFeature || r.config.UseBuilder {
+				r.testTxGeneration(ctx, g, keystorePath, []e2etypes.ComponentRunner{})
+			}
+		}()
 		if r.config.TestDeposits {
 			return depositCheckValidator.Start(ctx)
 		}
@@ -252,7 +252,7 @@ func (r *testRunner) testDepositsAndTx(ctx context.Context, g *errgroup.Group,
 }

 func (r *testRunner) testTxGeneration(ctx context.Context, g *errgroup.Group, keystorePath string, requiredNodes []e2etypes.ComponentRunner) {
-	txGenerator := eth1.NewTransactionGenerator(keystorePath, r.config.Seed, r.config.UseLargeBlobs)
+	txGenerator := eth1.NewTransactionGenerator(keystorePath, r.config.Seed, r.config.UseLargeBlobs, r.config.BlobTxCount)
 	r.comHandler.txGen = txGenerator
 	g.Go(func() error {
 		if err := helpers.ComponentsStarted(ctx, requiredNodes); err != nil {
--- a/testing/endtoend/evaluators/node.go
+++ b/testing/endtoend/evaluators/node.go
@@ -156,19 +156,9 @@ func waitForMidEpoch(conn *grpc.ClientConn) error {
 	}
 }

-func allNodesHaveSameHead(_ *e2etypes.EvaluationContext, conns ...*grpc.ClientConn) error {
-	// Wait until we're at least halfway into the epoch to avoid race conditions
-	// at epoch boundaries where nodes may report different epochs.
-	if err := waitForMidEpoch(conns[0]); err != nil {
-		return errors.Wrap(err, "failed waiting for mid-epoch")
-	}
-
-	headEpochs := make([]primitives.Epoch, len(conns))
-	headBlockRoots := make([][]byte, len(conns))
-	justifiedRoots := make([][]byte, len(conns))
-	prevJustifiedRoots := make([][]byte, len(conns))
-	finalizedRoots := make([][]byte, len(conns))
-	chainHeads := make([]*eth.ChainHead, len(conns))
+// getHeadEpochs fetches the head epoch from all beacon nodes concurrently.
+func getHeadEpochs(conns []*grpc.ClientConn) ([]primitives.Epoch, error) {
+	epochs := make([]primitives.Epoch, len(conns))
 	g, _ := errgroup.WithContext(context.Background())

 	for i, conn := range conns {
@@ -180,63 +170,145 @@ func allNodesHaveSameHead(_ *e2etypes.EvaluationContext, conns ...*grpc.ClientCo
 			if err != nil {
 				return errors.Wrapf(err, "connection number=%d", conIdx)
 			}
-			headEpochs[conIdx] = chainHead.HeadEpoch
-			headBlockRoots[conIdx] = chainHead.HeadBlockRoot
-			justifiedRoots[conIdx] = chainHead.JustifiedBlockRoot
-			prevJustifiedRoots[conIdx] = chainHead.PreviousJustifiedBlockRoot
-			finalizedRoots[conIdx] = chainHead.FinalizedBlockRoot
-			chainHeads[conIdx] = chainHead
+			epochs[conIdx] = chainHead.HeadEpoch
 			return nil
 		})
 	}
 	if err := g.Wait(); err != nil {
-		return err
+		return nil, err
 	}
-
-	for i := range conns {
-		if headEpochs[0] != headEpochs[i] {
-			return fmt.Errorf(
-				"received conflicting head epochs on node %d, expected %d, received %d",
-				i,
-				headEpochs[0],
-				headEpochs[i],
-			)
-		}
-		if !bytes.Equal(headBlockRoots[0], headBlockRoots[i]) {
-			return fmt.Errorf(
-				"received conflicting head block roots on node %d, expected %#x, received %#x",
-				i,
-				headBlockRoots[0],
-				headBlockRoots[i],
-			)
-		}
-		if !bytes.Equal(justifiedRoots[0], justifiedRoots[i]) {
-			return fmt.Errorf(
-				"received conflicting justified block roots on node %d, expected %#x, received %#x: %s and %s",
-				i,
-				justifiedRoots[0],
-				justifiedRoots[i],
-				chainHeads[0].String(),
-				chainHeads[i].String(),
-			)
-		}
-		if !bytes.Equal(prevJustifiedRoots[0], prevJustifiedRoots[i]) {
-			return fmt.Errorf(
-				"received conflicting previous justified block roots on node %d, expected %#x, received %#x",
-				i,
-				prevJustifiedRoots[0],
-				prevJustifiedRoots[i],
-			)
-		}
-		if !bytes.Equal(finalizedRoots[0], finalizedRoots[i]) {
-			return fmt.Errorf(
-				"received conflicting finalized epoch roots on node %d, expected %#x, received %#x",
-				i,
-				finalizedRoots[0],
-				finalizedRoots[i],
-			)
-		}
-	}
-
-	return nil
+	return epochs, nil
+}
+
+func allNodesHaveSameHead(_ *e2etypes.EvaluationContext, conns ...*grpc.ClientConn) error {
+	// Wait until we're at least halfway into the epoch to avoid race conditions
+	// at epoch boundaries where nodes may report different epochs.
+	if err := waitForMidEpoch(conns[0]); err != nil {
+		return errors.Wrap(err, "failed waiting for mid-epoch")
+	}
+
+	// First, wait for all nodes to reach the same epoch. Sync nodes may be
+	// behind and need time to catch up. We poll every 2 seconds with a
+	// 60 second timeout - this adapts to actual sync progress rather than
+	// using fixed delays.
+	const epochTimeout = 60 * time.Second
+	const epochPollInterval = 2 * time.Second
+	epochDeadline := time.Now().Add(epochTimeout)
+
+	for time.Now().Before(epochDeadline) {
+		epochs, err := getHeadEpochs(conns)
+		if err != nil {
+			return err
+		}
+		allSame := true
+		for i := 1; i < len(epochs); i++ {
+			if epochs[0] != epochs[i] {
+				allSame = false
+				break
+			}
+		}
+		if allSame {
+			break
+		}
+		time.Sleep(epochPollInterval)
+	}
+
+	// Now that epochs match (or timeout reached), do detailed head comparison
+	// with a few retries to handle block propagation delays.
+	const maxRetries = 5
+	const retryDelay = 1 * time.Second
+	var lastErr error
+
+	for attempt := range maxRetries {
+		if attempt > 0 {
+			time.Sleep(retryDelay)
+		}
+
+		headEpochs := make([]primitives.Epoch, len(conns))
+		headBlockRoots := make([][]byte, len(conns))
+		justifiedRoots := make([][]byte, len(conns))
+		prevJustifiedRoots := make([][]byte, len(conns))
+		finalizedRoots := make([][]byte, len(conns))
+		chainHeads := make([]*eth.ChainHead, len(conns))
+		g, _ := errgroup.WithContext(context.Background())
+
+		for i, conn := range conns {
+			conIdx := i
+			currConn := conn
+			g.Go(func() error {
+				beaconClient := eth.NewBeaconChainClient(currConn)
+				chainHead, err := beaconClient.GetChainHead(context.Background(), &emptypb.Empty{})
+				if err != nil {
+					return errors.Wrapf(err, "connection number=%d", conIdx)
+				}
+				headEpochs[conIdx] = chainHead.HeadEpoch
+				headBlockRoots[conIdx] = chainHead.HeadBlockRoot
+				justifiedRoots[conIdx] = chainHead.JustifiedBlockRoot
+				prevJustifiedRoots[conIdx] = chainHead.PreviousJustifiedBlockRoot
+				finalizedRoots[conIdx] = chainHead.FinalizedBlockRoot
+				chainHeads[conIdx] = chainHead
+				return nil
+			})
+		}
+		if err := g.Wait(); err != nil {
+			return err
+		}
+
+		lastErr = nil
+		for i := range conns {
+			if headEpochs[0] != headEpochs[i] {
+				lastErr = fmt.Errorf(
+					"received conflicting head epochs on node %d, expected %d, received %d",
+					i,
+					headEpochs[0],
+					headEpochs[i],
+				)
+				break
+			}
+			if !bytes.Equal(headBlockRoots[0], headBlockRoots[i]) {
+				lastErr = fmt.Errorf(
+					"received conflicting head block roots on node %d, expected %#x, received %#x",
+					i,
+					headBlockRoots[0],
+					headBlockRoots[i],
+				)
+				break
+			}
+			if !bytes.Equal(justifiedRoots[0], justifiedRoots[i]) {
+				lastErr = fmt.Errorf(
+					"received conflicting justified block roots on node %d, expected %#x, received %#x: %s and %s",
+					i,
+					justifiedRoots[0],
+					justifiedRoots[i],
+					chainHeads[0].String(),
+					chainHeads[i].String(),
+				)
+				break
+			}
+			if !bytes.Equal(prevJustifiedRoots[0], prevJustifiedRoots[i]) {
+				lastErr = fmt.Errorf(
+					"received conflicting previous justified block roots on node %d, expected %#x, received %#x",
+					i,
+					prevJustifiedRoots[0],
+					prevJustifiedRoots[i],
+				)
+				break
+			}
+			if !bytes.Equal(finalizedRoots[0], finalizedRoots[i]) {
+				lastErr = fmt.Errorf(
+					"received conflicting finalized epoch roots on node %d, expected %#x, received %#x",
+					i,
+					finalizedRoots[0],
+					finalizedRoots[i],
+				)
+				break
+			}
+		}
+
+		if lastErr == nil {
+			return nil
+		}
+	}
+
+	return lastErr
 }
--- a/testing/endtoend/evaluators/validator.go
+++ b/testing/endtoend/evaluators/validator.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"net/http"
 	"strconv"
+	"time"

 	"github.com/OffchainLabs/prysm/v7/api/server/structs"
 	"github.com/OffchainLabs/prysm/v7/beacon-chain/core/altair"
@@ -123,6 +124,25 @@ func validatorsAreActive(ec *types.EvaluationContext, conns ...*grpc.ClientConn)

 // validatorsParticipating ensures the validators have an acceptable participation rate.
 func validatorsParticipating(_ *types.EvaluationContext, conns ...*grpc.ClientConn) error {
+	// Retry up to 3 times with 2 second delays to handle timing flakes where
+	// attestations haven't been fully processed yet due to block propagation delays.
+	const maxRetries = 3
+	const retryDelay = 2 * time.Second
+	var lastErr error
+
+	for attempt := range maxRetries {
+		if attempt > 0 {
+			time.Sleep(retryDelay)
+		}
+		lastErr = checkValidatorsParticipating(conns)
+		if lastErr == nil {
+			return nil
+		}
+	}
+	return lastErr
+}
+
+func checkValidatorsParticipating(conns []*grpc.ClientConn) error {
 	conn := conns[0]
 	client := ethpb.NewBeaconChainClient(conn)
 	validatorRequest := &ethpb.GetValidatorParticipationRequest{}
@@ -234,6 +254,25 @@ func validatorsParticipating(_ *types.EvaluationContext, conns ...*grpc.ClientCo
 // validatorsSyncParticipation ensures the validators have an acceptable participation rate for
 // sync committee assignments.
 func validatorsSyncParticipation(_ *types.EvaluationContext, conns ...*grpc.ClientConn) error {
+	// Retry up to 3 times with 2 second delays to handle timing flakes where
+	// sync committee messages haven't fully propagated yet.
+	const maxRetries = 3
+	const retryDelay = 2 * time.Second
+	var lastErr error
+
+	for attempt := range maxRetries {
+		if attempt > 0 {
+			time.Sleep(retryDelay)
+		}
+		lastErr = checkSyncParticipation(conns)
+		if lastErr == nil {
+			return nil
+		}
+	}
+	return lastErr
+}
+
+func checkSyncParticipation(conns []*grpc.ClientConn) error {
 	conn := conns[0]
 	client := ethpb.NewNodeClient(conn)
 	altairClient := ethpb.NewBeaconChainClient(conn)
@@ -272,9 +311,9 @@ func validatorsSyncParticipation(_ *types.EvaluationContext, conns ...*grpc.Clie
 			// Skip fork slot.
 			continue
 		}
-		// Skip slots 1-2 at genesis - validators need time to ramp up after chain start
+		// Skip early slots at genesis - validators need time to ramp up after chain start
 		// due to doppelganger protection. This is a startup timing issue, not a fork transition issue.
-		if b.Block().Slot() < 3 {
+		if b.Block().Slot() < 5 {
 			continue
 		}
 		expectedParticipation := expectedSyncParticipation
@@ -289,6 +328,11 @@ func validatorsSyncParticipation(_ *types.EvaluationContext, conns ...*grpc.Clie
 		if err != nil {
 			return err
 		}
+		// Skip blocks with zero sync bits - these are typically empty/anomalous blocks
+		// where the proposer didn't receive sync committee contributions in time.
+		if syncAgg.SyncCommitteeBits.Count() == 0 {
+			continue
+		}
 		threshold := uint64(float64(syncAgg.SyncCommitteeBits.Len()) * expectedParticipation)
 		if syncAgg.SyncCommitteeBits.Count() < threshold {
 			return errors.Errorf("In block of slot %d ,the aggregate bitvector with length of %d only got a count of %d", b.Block().Slot(), threshold, syncAgg.SyncCommitteeBits.Count())
@@ -343,6 +387,11 @@ func validatorsSyncParticipation(_ *types.EvaluationContext, conns ...*grpc.Clie
 		if err != nil {
 			return err
 		}
+		// Skip blocks with zero sync bits - these are typically empty/anomalous blocks
+		// where the proposer didn't receive sync committee contributions in time.
+		if syncAgg.SyncCommitteeBits.Count() == 0 {
+			continue
+		}
 		threshold := uint64(float64(syncAgg.SyncCommitteeBits.Len()) * expectedSyncParticipation)
 		if syncAgg.SyncCommitteeBits.Count() < threshold {
 			return errors.Errorf("In block of slot %d ,the aggregate bitvector with length of %d only got a count of %d", b.Block().Slot(), threshold, syncAgg.SyncCommitteeBits.Count())
--- a/testing/endtoend/minimal_builder_e2e_test.go
+++ b/testing/endtoend/minimal_builder_e2e_test.go
@@ -9,11 +9,11 @@ import (
 )

 func TestEndToEnd_MinimalConfig_WithBuilder(t *testing.T) {
-	r := e2eMinimal(t, types.InitForkCfg(version.Bellatrix, version.Electra, params.E2ETestConfig()), types.WithCheckpointSync(), types.WithBuilder())
+	r := e2eMinimal(t, types.InitForkCfg(version.Bellatrix, version.Electra, params.E2ETestConfig()), types.WithCheckpointSync(), types.WithBuilder(), types.WithBlobTxCount(2))
 	r.run()
 }

 func TestEndToEnd_MinimalConfig_WithBuilder_ValidatorRESTApi(t *testing.T) {
-	r := e2eMinimal(t, types.InitForkCfg(version.Bellatrix, version.Electra, params.E2ETestConfig()), types.WithCheckpointSync(), types.WithBuilder(), types.WithValidatorRESTApi())
+	r := e2eMinimal(t, types.InitForkCfg(version.Bellatrix, version.Electra, params.E2ETestConfig()), types.WithCheckpointSync(), types.WithBuilder(), types.WithValidatorRESTApi(), types.WithBlobTxCount(2))
 	r.run()
 }
--- a/testing/endtoend/types/types.go
+++ b/testing/endtoend/types/types.go
@@ -68,6 +68,14 @@ func WithLargeBlobs() E2EConfigOpt {
 	}
 }

+// WithBlobTxCount sets the number of blob transactions sent per slot.
+// Default is 5 when not specified.
+func WithBlobTxCount(n int) E2EConfigOpt {
+	return func(cfg *E2EConfig) {
+		cfg.BlobTxCount = n
+	}
+}
+
 func WithSSZOnly() E2EConfigOpt {
 	return func(cfg *E2EConfig) {
 		if err := os.Setenv(params.EnvNameOverrideAccept, api.OctetStreamMediaType); err != nil {
@@ -108,6 +116,7 @@ type E2EConfig struct {
 	UseBeaconRestApi        bool
 	UseBuilder              bool
 	UseLargeBlobs           bool // Use large blob transactions (6 blobs per tx) for BPO testing
+	BlobTxCount             int  // Number of blob transactions per slot (0 means default of 5)
 	EpochsToRun             uint64
 	ExitEpoch               primitives.Epoch // Custom epoch for voluntary exit submission (0 means use default)
 	Seed                    int64
Author	SHA1	Message	Date
james-prysm	7450c117e2	Merge branch 'develop' into deflake-evaluator	2026-01-29 14:22:09 -08:00
james-prysm	80b60d06d5	Merge branch 'develop' into deflake-evaluator	2026-01-29 09:45:52 -08:00
james-prysm	e694571474	reducing retry delay as well as the blob transaction count for builder tests	2026-01-29 08:20:00 -06:00
james-prysm	ca1f386cff	Merge branch 'develop' into deflake-evaluator	2026-01-28 13:31:32 -08:00
james-prysm	6bbc9de081	adding skip for blocks with empty sync committee bits count	2026-01-28 11:21:58 -06:00
james-prysm	e8da68bb0f	add an epoch poll	2026-01-28 10:17:26 -06:00
james-prysm	7e33e96605	updating retry count and buffer for sync committee skip	2026-01-27 20:09:42 -06:00
james-prysm	2c4a9bc4ac	Merge branch 'develop' into deflake-evaluator	2026-01-27 14:21:02 -08:00
james-prysm	4c32b6a89e	changelog	2026-01-27 16:20:09 -06:00
james-prysm	c69ffbec62	attempt	2026-01-26 16:16:03 -06:00