mirror of
https://github.com/OffchainLabs/prysm.git
synced 2026-01-28 14:48:13 -05:00
Compare commits
7 Commits
debug-stat
...
deflake-ev
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6bbc9de081 | ||
|
|
e8da68bb0f | ||
|
|
7e33e96605 | ||
|
|
2c4a9bc4ac | ||
|
|
4c32b6a89e | ||
|
|
1c65c8866a | ||
|
|
c69ffbec62 |
3
changelog/farazdagi_fix-hashtree-darwin-amd64.md
Normal file
3
changelog/farazdagi_fix-hashtree-darwin-amd64.md
Normal file
@@ -0,0 +1,3 @@
|
||||
### Fixed
|
||||
|
||||
- Fix Bazel build failure on macOS x86_64 (darwin_amd64) (adds missing assembly stub to hashtree patch).
|
||||
3
changelog/james-prysm_deflake-evaluator.md
Normal file
3
changelog/james-prysm_deflake-evaluator.md
Normal file
@@ -0,0 +1,3 @@
|
||||
### Ignored
|
||||
|
||||
- adding some short retries for some end to end evaluators in an attempt to deflake tests.
|
||||
@@ -156,19 +156,9 @@ func waitForMidEpoch(conn *grpc.ClientConn) error {
|
||||
}
|
||||
}
|
||||
|
||||
func allNodesHaveSameHead(_ *e2etypes.EvaluationContext, conns ...*grpc.ClientConn) error {
|
||||
// Wait until we're at least halfway into the epoch to avoid race conditions
|
||||
// at epoch boundaries where nodes may report different epochs.
|
||||
if err := waitForMidEpoch(conns[0]); err != nil {
|
||||
return errors.Wrap(err, "failed waiting for mid-epoch")
|
||||
}
|
||||
|
||||
headEpochs := make([]primitives.Epoch, len(conns))
|
||||
headBlockRoots := make([][]byte, len(conns))
|
||||
justifiedRoots := make([][]byte, len(conns))
|
||||
prevJustifiedRoots := make([][]byte, len(conns))
|
||||
finalizedRoots := make([][]byte, len(conns))
|
||||
chainHeads := make([]*eth.ChainHead, len(conns))
|
||||
// getHeadEpochs fetches the head epoch from all beacon nodes concurrently.
|
||||
func getHeadEpochs(conns []*grpc.ClientConn) ([]primitives.Epoch, error) {
|
||||
epochs := make([]primitives.Epoch, len(conns))
|
||||
g, _ := errgroup.WithContext(context.Background())
|
||||
|
||||
for i, conn := range conns {
|
||||
@@ -180,63 +170,145 @@ func allNodesHaveSameHead(_ *e2etypes.EvaluationContext, conns ...*grpc.ClientCo
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "connection number=%d", conIdx)
|
||||
}
|
||||
headEpochs[conIdx] = chainHead.HeadEpoch
|
||||
headBlockRoots[conIdx] = chainHead.HeadBlockRoot
|
||||
justifiedRoots[conIdx] = chainHead.JustifiedBlockRoot
|
||||
prevJustifiedRoots[conIdx] = chainHead.PreviousJustifiedBlockRoot
|
||||
finalizedRoots[conIdx] = chainHead.FinalizedBlockRoot
|
||||
chainHeads[conIdx] = chainHead
|
||||
epochs[conIdx] = chainHead.HeadEpoch
|
||||
return nil
|
||||
})
|
||||
}
|
||||
if err := g.Wait(); err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for i := range conns {
|
||||
if headEpochs[0] != headEpochs[i] {
|
||||
return fmt.Errorf(
|
||||
"received conflicting head epochs on node %d, expected %d, received %d",
|
||||
i,
|
||||
headEpochs[0],
|
||||
headEpochs[i],
|
||||
)
|
||||
}
|
||||
if !bytes.Equal(headBlockRoots[0], headBlockRoots[i]) {
|
||||
return fmt.Errorf(
|
||||
"received conflicting head block roots on node %d, expected %#x, received %#x",
|
||||
i,
|
||||
headBlockRoots[0],
|
||||
headBlockRoots[i],
|
||||
)
|
||||
}
|
||||
if !bytes.Equal(justifiedRoots[0], justifiedRoots[i]) {
|
||||
return fmt.Errorf(
|
||||
"received conflicting justified block roots on node %d, expected %#x, received %#x: %s and %s",
|
||||
i,
|
||||
justifiedRoots[0],
|
||||
justifiedRoots[i],
|
||||
chainHeads[0].String(),
|
||||
chainHeads[i].String(),
|
||||
)
|
||||
}
|
||||
if !bytes.Equal(prevJustifiedRoots[0], prevJustifiedRoots[i]) {
|
||||
return fmt.Errorf(
|
||||
"received conflicting previous justified block roots on node %d, expected %#x, received %#x",
|
||||
i,
|
||||
prevJustifiedRoots[0],
|
||||
prevJustifiedRoots[i],
|
||||
)
|
||||
}
|
||||
if !bytes.Equal(finalizedRoots[0], finalizedRoots[i]) {
|
||||
return fmt.Errorf(
|
||||
"received conflicting finalized epoch roots on node %d, expected %#x, received %#x",
|
||||
i,
|
||||
finalizedRoots[0],
|
||||
finalizedRoots[i],
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
return epochs, nil
|
||||
}
|
||||
|
||||
func allNodesHaveSameHead(_ *e2etypes.EvaluationContext, conns ...*grpc.ClientConn) error {
|
||||
// Wait until we're at least halfway into the epoch to avoid race conditions
|
||||
// at epoch boundaries where nodes may report different epochs.
|
||||
if err := waitForMidEpoch(conns[0]); err != nil {
|
||||
return errors.Wrap(err, "failed waiting for mid-epoch")
|
||||
}
|
||||
|
||||
// First, wait for all nodes to reach the same epoch. Sync nodes may be
|
||||
// behind and need time to catch up. We poll every 2 seconds with a
|
||||
// 60 second timeout - this adapts to actual sync progress rather than
|
||||
// using fixed delays.
|
||||
const epochTimeout = 60 * time.Second
|
||||
const epochPollInterval = 2 * time.Second
|
||||
epochDeadline := time.Now().Add(epochTimeout)
|
||||
|
||||
for time.Now().Before(epochDeadline) {
|
||||
epochs, err := getHeadEpochs(conns)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
allSame := true
|
||||
for i := 1; i < len(epochs); i++ {
|
||||
if epochs[0] != epochs[i] {
|
||||
allSame = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if allSame {
|
||||
break
|
||||
}
|
||||
time.Sleep(epochPollInterval)
|
||||
}
|
||||
|
||||
// Now that epochs match (or timeout reached), do detailed head comparison
|
||||
// with a few retries to handle block propagation delays.
|
||||
const maxRetries = 5
|
||||
const retryDelay = 3 * time.Second
|
||||
var lastErr error
|
||||
|
||||
for attempt := range maxRetries {
|
||||
if attempt > 0 {
|
||||
time.Sleep(retryDelay)
|
||||
}
|
||||
|
||||
headEpochs := make([]primitives.Epoch, len(conns))
|
||||
headBlockRoots := make([][]byte, len(conns))
|
||||
justifiedRoots := make([][]byte, len(conns))
|
||||
prevJustifiedRoots := make([][]byte, len(conns))
|
||||
finalizedRoots := make([][]byte, len(conns))
|
||||
chainHeads := make([]*eth.ChainHead, len(conns))
|
||||
g, _ := errgroup.WithContext(context.Background())
|
||||
|
||||
for i, conn := range conns {
|
||||
conIdx := i
|
||||
currConn := conn
|
||||
g.Go(func() error {
|
||||
beaconClient := eth.NewBeaconChainClient(currConn)
|
||||
chainHead, err := beaconClient.GetChainHead(context.Background(), &emptypb.Empty{})
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "connection number=%d", conIdx)
|
||||
}
|
||||
headEpochs[conIdx] = chainHead.HeadEpoch
|
||||
headBlockRoots[conIdx] = chainHead.HeadBlockRoot
|
||||
justifiedRoots[conIdx] = chainHead.JustifiedBlockRoot
|
||||
prevJustifiedRoots[conIdx] = chainHead.PreviousJustifiedBlockRoot
|
||||
finalizedRoots[conIdx] = chainHead.FinalizedBlockRoot
|
||||
chainHeads[conIdx] = chainHead
|
||||
return nil
|
||||
})
|
||||
}
|
||||
if err := g.Wait(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
lastErr = nil
|
||||
for i := range conns {
|
||||
if headEpochs[0] != headEpochs[i] {
|
||||
lastErr = fmt.Errorf(
|
||||
"received conflicting head epochs on node %d, expected %d, received %d",
|
||||
i,
|
||||
headEpochs[0],
|
||||
headEpochs[i],
|
||||
)
|
||||
break
|
||||
}
|
||||
if !bytes.Equal(headBlockRoots[0], headBlockRoots[i]) {
|
||||
lastErr = fmt.Errorf(
|
||||
"received conflicting head block roots on node %d, expected %#x, received %#x",
|
||||
i,
|
||||
headBlockRoots[0],
|
||||
headBlockRoots[i],
|
||||
)
|
||||
break
|
||||
}
|
||||
if !bytes.Equal(justifiedRoots[0], justifiedRoots[i]) {
|
||||
lastErr = fmt.Errorf(
|
||||
"received conflicting justified block roots on node %d, expected %#x, received %#x: %s and %s",
|
||||
i,
|
||||
justifiedRoots[0],
|
||||
justifiedRoots[i],
|
||||
chainHeads[0].String(),
|
||||
chainHeads[i].String(),
|
||||
)
|
||||
break
|
||||
}
|
||||
if !bytes.Equal(prevJustifiedRoots[0], prevJustifiedRoots[i]) {
|
||||
lastErr = fmt.Errorf(
|
||||
"received conflicting previous justified block roots on node %d, expected %#x, received %#x",
|
||||
i,
|
||||
prevJustifiedRoots[0],
|
||||
prevJustifiedRoots[i],
|
||||
)
|
||||
break
|
||||
}
|
||||
if !bytes.Equal(finalizedRoots[0], finalizedRoots[i]) {
|
||||
lastErr = fmt.Errorf(
|
||||
"received conflicting finalized epoch roots on node %d, expected %#x, received %#x",
|
||||
i,
|
||||
finalizedRoots[0],
|
||||
finalizedRoots[i],
|
||||
)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if lastErr == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return lastErr
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/OffchainLabs/prysm/v7/api/server/structs"
|
||||
"github.com/OffchainLabs/prysm/v7/beacon-chain/core/altair"
|
||||
@@ -123,6 +124,25 @@ func validatorsAreActive(ec *types.EvaluationContext, conns ...*grpc.ClientConn)
|
||||
|
||||
// validatorsParticipating ensures the validators have an acceptable participation rate.
|
||||
func validatorsParticipating(_ *types.EvaluationContext, conns ...*grpc.ClientConn) error {
|
||||
// Retry up to 3 times with 2 second delays to handle timing flakes where
|
||||
// attestations haven't been fully processed yet due to block propagation delays.
|
||||
const maxRetries = 3
|
||||
const retryDelay = 2 * time.Second
|
||||
var lastErr error
|
||||
|
||||
for attempt := range maxRetries {
|
||||
if attempt > 0 {
|
||||
time.Sleep(retryDelay)
|
||||
}
|
||||
lastErr = checkValidatorsParticipating(conns)
|
||||
if lastErr == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
|
||||
func checkValidatorsParticipating(conns []*grpc.ClientConn) error {
|
||||
conn := conns[0]
|
||||
client := ethpb.NewBeaconChainClient(conn)
|
||||
validatorRequest := ðpb.GetValidatorParticipationRequest{}
|
||||
@@ -234,6 +254,25 @@ func validatorsParticipating(_ *types.EvaluationContext, conns ...*grpc.ClientCo
|
||||
// validatorsSyncParticipation ensures the validators have an acceptable participation rate for
|
||||
// sync committee assignments.
|
||||
func validatorsSyncParticipation(_ *types.EvaluationContext, conns ...*grpc.ClientConn) error {
|
||||
// Retry up to 3 times with 2 second delays to handle timing flakes where
|
||||
// sync committee messages haven't fully propagated yet.
|
||||
const maxRetries = 3
|
||||
const retryDelay = 2 * time.Second
|
||||
var lastErr error
|
||||
|
||||
for attempt := range maxRetries {
|
||||
if attempt > 0 {
|
||||
time.Sleep(retryDelay)
|
||||
}
|
||||
lastErr = checkSyncParticipation(conns)
|
||||
if lastErr == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return lastErr
|
||||
}
|
||||
|
||||
func checkSyncParticipation(conns []*grpc.ClientConn) error {
|
||||
conn := conns[0]
|
||||
client := ethpb.NewNodeClient(conn)
|
||||
altairClient := ethpb.NewBeaconChainClient(conn)
|
||||
@@ -272,9 +311,9 @@ func validatorsSyncParticipation(_ *types.EvaluationContext, conns ...*grpc.Clie
|
||||
// Skip fork slot.
|
||||
continue
|
||||
}
|
||||
// Skip slots 1-2 at genesis - validators need time to ramp up after chain start
|
||||
// Skip early slots at genesis - validators need time to ramp up after chain start
|
||||
// due to doppelganger protection. This is a startup timing issue, not a fork transition issue.
|
||||
if b.Block().Slot() < 3 {
|
||||
if b.Block().Slot() < 5 {
|
||||
continue
|
||||
}
|
||||
expectedParticipation := expectedSyncParticipation
|
||||
@@ -289,6 +328,11 @@ func validatorsSyncParticipation(_ *types.EvaluationContext, conns ...*grpc.Clie
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Skip blocks with zero sync bits - these are typically empty/anomalous blocks
|
||||
// where the proposer didn't receive sync committee contributions in time.
|
||||
if syncAgg.SyncCommitteeBits.Count() == 0 {
|
||||
continue
|
||||
}
|
||||
threshold := uint64(float64(syncAgg.SyncCommitteeBits.Len()) * expectedParticipation)
|
||||
if syncAgg.SyncCommitteeBits.Count() < threshold {
|
||||
return errors.Errorf("In block of slot %d ,the aggregate bitvector with length of %d only got a count of %d", b.Block().Slot(), threshold, syncAgg.SyncCommitteeBits.Count())
|
||||
@@ -343,6 +387,11 @@ func validatorsSyncParticipation(_ *types.EvaluationContext, conns ...*grpc.Clie
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Skip blocks with zero sync bits - these are typically empty/anomalous blocks
|
||||
// where the proposer didn't receive sync committee contributions in time.
|
||||
if syncAgg.SyncCommitteeBits.Count() == 0 {
|
||||
continue
|
||||
}
|
||||
threshold := uint64(float64(syncAgg.SyncCommitteeBits.Len()) * expectedSyncParticipation)
|
||||
if syncAgg.SyncCommitteeBits.Count() < threshold {
|
||||
return errors.Errorf("In block of slot %d ,the aggregate bitvector with length of %d only got a count of %d", b.Block().Slot(), threshold, syncAgg.SyncCommitteeBits.Count())
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
diff -urN a/BUILD.bazel b/BUILD.bazel
|
||||
--- a/BUILD.bazel 1969-12-31 18:00:00.000000000 -0600
|
||||
+++ b/BUILD.bazel 2025-01-05 12:00:00.000000000 -0600
|
||||
@@ -0,0 +1,89 @@
|
||||
@@ -0,0 +1,90 @@
|
||||
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
+
|
||||
+go_library(
|
||||
@@ -32,6 +32,7 @@ diff -urN a/BUILD.bazel b/BUILD.bazel
|
||||
+ ],
|
||||
+ "@io_bazel_rules_go//go/platform:darwin_amd64": [
|
||||
+ "bindings_darwin_amd64.go",
|
||||
+ "wrapper_darwin_amd64.s",
|
||||
+ ],
|
||||
+ "//conditions:default": [],
|
||||
+ }),
|
||||
|
||||
Reference in New Issue
Block a user