Compare commits

...

7 Commits

Author SHA1 Message Date
james-prysm
6bbc9de081 adding skip for blocks with empty sync committee bits count 2026-01-28 11:21:58 -06:00
james-prysm
e8da68bb0f add an epoch poll 2026-01-28 10:17:26 -06:00
james-prysm
7e33e96605 updating retry count and buffer for sync committee skip 2026-01-27 20:09:42 -06:00
james-prysm
2c4a9bc4ac Merge branch 'develop' into deflake-evaluator 2026-01-27 14:21:02 -08:00
james-prysm
4c32b6a89e changelog 2026-01-27 16:20:09 -06:00
Victor Farazdagi
1c65c8866a fix: bazel build failure on macOS (hashtree patch) (#16281)
**What type of PR is this?**


Bug fix

**What does this PR do? Why is it needed?**
  
It appears that #16216 introduced hashtree integration but broke builds
on macOS Intel (darwin_amd64).

```                                                                                                                                                                                                                                                                          
  Error:                                                                                                                                                                                                                                                                  
  Undefined symbols for architecture x86_64:                                                                                                                                                                                                                              
    "_github.com/OffchainLabs/hashtree.HashtreeHash"                                                                                                                                                                                                                      
```

The Bazel patch for hashtree was missing `wrapper_darwin_amd64.s`. So,
`//go:noescape` in `bindings.go` assumes that symbols are available
elsewhere, and while on other platforms optimized version is used, on
Darwin we have stub (symbol still must be available), which needs to be
referenced -- hence, this PR.

**Other notes for review**

I've re-checked using `bazel clean && bazel build //cmd/beacon-chain` --
it was failing before, works now.

cc @potuz as original patch author

**Acknowledgements**

- [x] I have read
[CONTRIBUTING.md](https://github.com/prysmaticlabs/prysm/blob/develop/CONTRIBUTING.md).
- [x] I have included a uniquely named [changelog fragment
file](https://github.com/prysmaticlabs/prysm/blob/develop/CONTRIBUTING.md#maintaining-changelogmd).
- [x] I have added a description with sufficient context for reviewers
to understand this PR.
- [x] I have tested that my changes work as expected and I added a
testing plan to the PR description (if applicable).

Co-authored-by: Potuz <potuz@prysmaticlabs.com>
2026-01-27 16:40:29 +00:00
james-prysm
c69ffbec62 attempt 2026-01-26 16:16:03 -06:00
5 changed files with 198 additions and 70 deletions

View File

@@ -0,0 +1,3 @@
### Fixed
- Fix Bazel build failure on macOS x86_64 (darwin_amd64) (adds missing assembly stub to hashtree patch).

View File

@@ -0,0 +1,3 @@
### Ignored
- adding some short retries for some end to end evaluators in an attempt to deflake tests.

View File

@@ -156,19 +156,9 @@ func waitForMidEpoch(conn *grpc.ClientConn) error {
}
}
func allNodesHaveSameHead(_ *e2etypes.EvaluationContext, conns ...*grpc.ClientConn) error {
// Wait until we're at least halfway into the epoch to avoid race conditions
// at epoch boundaries where nodes may report different epochs.
if err := waitForMidEpoch(conns[0]); err != nil {
return errors.Wrap(err, "failed waiting for mid-epoch")
}
headEpochs := make([]primitives.Epoch, len(conns))
headBlockRoots := make([][]byte, len(conns))
justifiedRoots := make([][]byte, len(conns))
prevJustifiedRoots := make([][]byte, len(conns))
finalizedRoots := make([][]byte, len(conns))
chainHeads := make([]*eth.ChainHead, len(conns))
// getHeadEpochs fetches the head epoch from all beacon nodes concurrently.
func getHeadEpochs(conns []*grpc.ClientConn) ([]primitives.Epoch, error) {
epochs := make([]primitives.Epoch, len(conns))
g, _ := errgroup.WithContext(context.Background())
for i, conn := range conns {
@@ -180,63 +170,145 @@ func allNodesHaveSameHead(_ *e2etypes.EvaluationContext, conns ...*grpc.ClientCo
if err != nil {
return errors.Wrapf(err, "connection number=%d", conIdx)
}
headEpochs[conIdx] = chainHead.HeadEpoch
headBlockRoots[conIdx] = chainHead.HeadBlockRoot
justifiedRoots[conIdx] = chainHead.JustifiedBlockRoot
prevJustifiedRoots[conIdx] = chainHead.PreviousJustifiedBlockRoot
finalizedRoots[conIdx] = chainHead.FinalizedBlockRoot
chainHeads[conIdx] = chainHead
epochs[conIdx] = chainHead.HeadEpoch
return nil
})
}
if err := g.Wait(); err != nil {
return err
return nil, err
}
for i := range conns {
if headEpochs[0] != headEpochs[i] {
return fmt.Errorf(
"received conflicting head epochs on node %d, expected %d, received %d",
i,
headEpochs[0],
headEpochs[i],
)
}
if !bytes.Equal(headBlockRoots[0], headBlockRoots[i]) {
return fmt.Errorf(
"received conflicting head block roots on node %d, expected %#x, received %#x",
i,
headBlockRoots[0],
headBlockRoots[i],
)
}
if !bytes.Equal(justifiedRoots[0], justifiedRoots[i]) {
return fmt.Errorf(
"received conflicting justified block roots on node %d, expected %#x, received %#x: %s and %s",
i,
justifiedRoots[0],
justifiedRoots[i],
chainHeads[0].String(),
chainHeads[i].String(),
)
}
if !bytes.Equal(prevJustifiedRoots[0], prevJustifiedRoots[i]) {
return fmt.Errorf(
"received conflicting previous justified block roots on node %d, expected %#x, received %#x",
i,
prevJustifiedRoots[0],
prevJustifiedRoots[i],
)
}
if !bytes.Equal(finalizedRoots[0], finalizedRoots[i]) {
return fmt.Errorf(
"received conflicting finalized epoch roots on node %d, expected %#x, received %#x",
i,
finalizedRoots[0],
finalizedRoots[i],
)
}
}
return nil
return epochs, nil
}
func allNodesHaveSameHead(_ *e2etypes.EvaluationContext, conns ...*grpc.ClientConn) error {
// Wait until we're at least halfway into the epoch to avoid race conditions
// at epoch boundaries where nodes may report different epochs.
if err := waitForMidEpoch(conns[0]); err != nil {
return errors.Wrap(err, "failed waiting for mid-epoch")
}
// First, wait for all nodes to reach the same epoch. Sync nodes may be
// behind and need time to catch up. We poll every 2 seconds with a
// 60 second timeout - this adapts to actual sync progress rather than
// using fixed delays.
const epochTimeout = 60 * time.Second
const epochPollInterval = 2 * time.Second
epochDeadline := time.Now().Add(epochTimeout)
for time.Now().Before(epochDeadline) {
epochs, err := getHeadEpochs(conns)
if err != nil {
return err
}
allSame := true
for i := 1; i < len(epochs); i++ {
if epochs[0] != epochs[i] {
allSame = false
break
}
}
if allSame {
break
}
time.Sleep(epochPollInterval)
}
// Now that epochs match (or timeout reached), do detailed head comparison
// with a few retries to handle block propagation delays.
const maxRetries = 5
const retryDelay = 3 * time.Second
var lastErr error
for attempt := range maxRetries {
if attempt > 0 {
time.Sleep(retryDelay)
}
headEpochs := make([]primitives.Epoch, len(conns))
headBlockRoots := make([][]byte, len(conns))
justifiedRoots := make([][]byte, len(conns))
prevJustifiedRoots := make([][]byte, len(conns))
finalizedRoots := make([][]byte, len(conns))
chainHeads := make([]*eth.ChainHead, len(conns))
g, _ := errgroup.WithContext(context.Background())
for i, conn := range conns {
conIdx := i
currConn := conn
g.Go(func() error {
beaconClient := eth.NewBeaconChainClient(currConn)
chainHead, err := beaconClient.GetChainHead(context.Background(), &emptypb.Empty{})
if err != nil {
return errors.Wrapf(err, "connection number=%d", conIdx)
}
headEpochs[conIdx] = chainHead.HeadEpoch
headBlockRoots[conIdx] = chainHead.HeadBlockRoot
justifiedRoots[conIdx] = chainHead.JustifiedBlockRoot
prevJustifiedRoots[conIdx] = chainHead.PreviousJustifiedBlockRoot
finalizedRoots[conIdx] = chainHead.FinalizedBlockRoot
chainHeads[conIdx] = chainHead
return nil
})
}
if err := g.Wait(); err != nil {
return err
}
lastErr = nil
for i := range conns {
if headEpochs[0] != headEpochs[i] {
lastErr = fmt.Errorf(
"received conflicting head epochs on node %d, expected %d, received %d",
i,
headEpochs[0],
headEpochs[i],
)
break
}
if !bytes.Equal(headBlockRoots[0], headBlockRoots[i]) {
lastErr = fmt.Errorf(
"received conflicting head block roots on node %d, expected %#x, received %#x",
i,
headBlockRoots[0],
headBlockRoots[i],
)
break
}
if !bytes.Equal(justifiedRoots[0], justifiedRoots[i]) {
lastErr = fmt.Errorf(
"received conflicting justified block roots on node %d, expected %#x, received %#x: %s and %s",
i,
justifiedRoots[0],
justifiedRoots[i],
chainHeads[0].String(),
chainHeads[i].String(),
)
break
}
if !bytes.Equal(prevJustifiedRoots[0], prevJustifiedRoots[i]) {
lastErr = fmt.Errorf(
"received conflicting previous justified block roots on node %d, expected %#x, received %#x",
i,
prevJustifiedRoots[0],
prevJustifiedRoots[i],
)
break
}
if !bytes.Equal(finalizedRoots[0], finalizedRoots[i]) {
lastErr = fmt.Errorf(
"received conflicting finalized epoch roots on node %d, expected %#x, received %#x",
i,
finalizedRoots[0],
finalizedRoots[i],
)
break
}
}
if lastErr == nil {
return nil
}
}
return lastErr
}

View File

@@ -6,6 +6,7 @@ import (
"fmt"
"net/http"
"strconv"
"time"
"github.com/OffchainLabs/prysm/v7/api/server/structs"
"github.com/OffchainLabs/prysm/v7/beacon-chain/core/altair"
@@ -123,6 +124,25 @@ func validatorsAreActive(ec *types.EvaluationContext, conns ...*grpc.ClientConn)
// validatorsParticipating ensures the validators have an acceptable participation rate.
func validatorsParticipating(_ *types.EvaluationContext, conns ...*grpc.ClientConn) error {
// Retry up to 3 times with 2 second delays to handle timing flakes where
// attestations haven't been fully processed yet due to block propagation delays.
const maxRetries = 3
const retryDelay = 2 * time.Second
var lastErr error
for attempt := range maxRetries {
if attempt > 0 {
time.Sleep(retryDelay)
}
lastErr = checkValidatorsParticipating(conns)
if lastErr == nil {
return nil
}
}
return lastErr
}
func checkValidatorsParticipating(conns []*grpc.ClientConn) error {
conn := conns[0]
client := ethpb.NewBeaconChainClient(conn)
validatorRequest := &ethpb.GetValidatorParticipationRequest{}
@@ -234,6 +254,25 @@ func validatorsParticipating(_ *types.EvaluationContext, conns ...*grpc.ClientCo
// validatorsSyncParticipation ensures the validators have an acceptable participation rate for
// sync committee assignments.
func validatorsSyncParticipation(_ *types.EvaluationContext, conns ...*grpc.ClientConn) error {
// Retry up to 3 times with 2 second delays to handle timing flakes where
// sync committee messages haven't fully propagated yet.
const maxRetries = 3
const retryDelay = 2 * time.Second
var lastErr error
for attempt := range maxRetries {
if attempt > 0 {
time.Sleep(retryDelay)
}
lastErr = checkSyncParticipation(conns)
if lastErr == nil {
return nil
}
}
return lastErr
}
func checkSyncParticipation(conns []*grpc.ClientConn) error {
conn := conns[0]
client := ethpb.NewNodeClient(conn)
altairClient := ethpb.NewBeaconChainClient(conn)
@@ -272,9 +311,9 @@ func validatorsSyncParticipation(_ *types.EvaluationContext, conns ...*grpc.Clie
// Skip fork slot.
continue
}
// Skip slots 1-2 at genesis - validators need time to ramp up after chain start
// Skip early slots at genesis - validators need time to ramp up after chain start
// due to doppelganger protection. This is a startup timing issue, not a fork transition issue.
if b.Block().Slot() < 3 {
if b.Block().Slot() < 5 {
continue
}
expectedParticipation := expectedSyncParticipation
@@ -289,6 +328,11 @@ func validatorsSyncParticipation(_ *types.EvaluationContext, conns ...*grpc.Clie
if err != nil {
return err
}
// Skip blocks with zero sync bits - these are typically empty/anomalous blocks
// where the proposer didn't receive sync committee contributions in time.
if syncAgg.SyncCommitteeBits.Count() == 0 {
continue
}
threshold := uint64(float64(syncAgg.SyncCommitteeBits.Len()) * expectedParticipation)
if syncAgg.SyncCommitteeBits.Count() < threshold {
return errors.Errorf("In block of slot %d ,the aggregate bitvector with length of %d only got a count of %d", b.Block().Slot(), threshold, syncAgg.SyncCommitteeBits.Count())
@@ -343,6 +387,11 @@ func validatorsSyncParticipation(_ *types.EvaluationContext, conns ...*grpc.Clie
if err != nil {
return err
}
// Skip blocks with zero sync bits - these are typically empty/anomalous blocks
// where the proposer didn't receive sync committee contributions in time.
if syncAgg.SyncCommitteeBits.Count() == 0 {
continue
}
threshold := uint64(float64(syncAgg.SyncCommitteeBits.Len()) * expectedSyncParticipation)
if syncAgg.SyncCommitteeBits.Count() < threshold {
return errors.Errorf("In block of slot %d ,the aggregate bitvector with length of %d only got a count of %d", b.Block().Slot(), threshold, syncAgg.SyncCommitteeBits.Count())

View File

@@ -1,7 +1,7 @@
diff -urN a/BUILD.bazel b/BUILD.bazel
--- a/BUILD.bazel 1969-12-31 18:00:00.000000000 -0600
+++ b/BUILD.bazel 2025-01-05 12:00:00.000000000 -0600
@@ -0,0 +1,89 @@
@@ -0,0 +1,90 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
@@ -32,6 +32,7 @@ diff -urN a/BUILD.bazel b/BUILD.bazel
+ ],
+ "@io_bazel_rules_go//go/platform:darwin_amd64": [
+ "bindings_darwin_amd64.go",
+ "wrapper_darwin_amd64.s",
+ ],
+ "//conditions:default": [],
+ }),