Updates resyncIfBehind() functionality (#7039)

* defines BestNonFinalized * updates blocks_fetcher * updates blocks_queue * Merge branch 'master' into fix-resync * Nishant's suggestion * Merge branch 'fix-resync' of github.com:prysmaticlabs/prysm into fix-resync * unit test * Merge branch 'master' into fix-resync * fixes test * Merge refs/heads/master into fix-resync
2026-01-10 07:58:22 -05:00 · 2020-08-18 05:10:45 +03:00
parent 4d463c4a85
commit 0a5ec502b0
9 changed files with 153 additions and 26 deletions
--- a/beacon-chain/p2p/peers/status.go
+++ b/beacon-chain/p2p/peers/status.go
@@ -480,6 +480,51 @@ func (p *Status) BestFinalized(maxPeers int, ourFinalizedEpoch uint64) (uint64,
 	return targetEpoch, potentialPIDs
 }

+// BestNonFinalized returns the highest known epoch, which is higher than ours, and is shared
+// by at least minPeers.
+func (p *Status) BestNonFinalized(minPeers int, ourFinalizedEpoch uint64) (uint64, []peer.ID) {
+	connected := p.Connected()
+	epochVotes := make(map[uint64]uint64)
+	pidEpoch := make(map[peer.ID]uint64, len(connected))
+	pidHead := make(map[peer.ID]uint64, len(connected))
+	potentialPIDs := make([]peer.ID, 0, len(connected))
+
+	ourFinalizedSlot := helpers.StartSlot(ourFinalizedEpoch)
+	for _, pid := range connected {
+		peerChainState, err := p.ChainState(pid)
+		if err == nil && peerChainState != nil && peerChainState.HeadSlot > ourFinalizedSlot {
+			epoch := helpers.SlotToEpoch(peerChainState.HeadSlot)
+			epochVotes[epoch]++
+			pidEpoch[pid] = epoch
+			pidHead[pid] = peerChainState.HeadSlot
+			potentialPIDs = append(potentialPIDs, pid)
+		}
+	}
+
+	// Select the target epoch, which has enough peers' votes (>= minPeers).
+	var targetEpoch uint64
+	for epoch, votes := range epochVotes {
+		if votes >= uint64(minPeers) && targetEpoch < epoch {
+			targetEpoch = epoch
+		}
+	}
+
+	// Sort PIDs by head slot, in decreasing order.
+	sort.Slice(potentialPIDs, func(i, j int) bool {
+		return pidHead[potentialPIDs[i]] > pidHead[potentialPIDs[j]]
+	})
+
+	// Trim potential peers to those on or after target epoch.
+	for i, pid := range potentialPIDs {
+		if pidEpoch[pid] < targetEpoch {
+			potentialPIDs = potentialPIDs[:i]
+			break
+		}
+	}
+
+	return targetEpoch, potentialPIDs
+}
+
 // fetch is a helper function that fetches a peer status, possibly creating it.
 func (p *Status) fetch(pid peer.ID) *peerData {
 	if _, ok := p.store.peers[pid]; !ok {
--- a/beacon-chain/p2p/peers/status_test.go
+++ b/beacon-chain/p2p/peers/status_test.go
@@ -612,6 +612,31 @@ func TestBestFinalized_returnsMaxValue(t *testing.T) {
 	assert.Equal(t, maxPeers, len(pids), "Wrong number of peers returned")
 }

+func TestStatus_BestNonFinalized(t *testing.T) {
+	p := peers.NewStatus(context.Background(), &peers.StatusConfig{
+		PeerLimit: 30,
+		ScorerParams: &peers.PeerScorerConfig{
+			BadResponsesScorerConfig: &peers.BadResponsesScorerConfig{
+				Threshold: 2,
+			},
+		},
+	})
+
+	peerSlots := []uint64{32, 32, 32, 32, 235, 233, 258, 268, 270}
+	for i, headSlot := range peerSlots {
+		p.Add(new(enr.Record), peer.ID(i), nil, network.DirOutbound)
+		p.SetConnectionState(peer.ID(i), peers.PeerConnected)
+		p.SetChainState(peer.ID(i), &pb.Status{
+			HeadSlot: headSlot,
+		})
+	}
+
+	expectedEpoch := uint64(8)
+	retEpoch, pids := p.BestNonFinalized(3, 5)
+	assert.Equal(t, expectedEpoch, retEpoch, "Incorrect Finalized epoch retrieved")
+	assert.Equal(t, 3, len(pids), "Unexpected number of peers")
+}
+
 func TestStatus_CurrentEpoch(t *testing.T) {
 	maxBadResponses := 2
 	p := peers.NewStatus(context.Background(), &peers.StatusConfig{
--- a/beacon-chain/sync/initial-sync/blocks_fetcher.go
+++ b/beacon-chain/sync/initial-sync/blocks_fetcher.go
@@ -249,8 +249,15 @@ func (f *blocksFetcher) handleRequest(ctx context.Context, start, count uint64)
 		return response
 	}

-	headEpoch := f.finalizationFetcher.FinalizedCheckpt().Epoch
-	finalizedEpoch, peers := f.p2p.Peers().BestFinalized(params.BeaconConfig().MaxPeersToSync, headEpoch)
+	var targetEpoch uint64
+	var peers []peer.ID
+	if f.mode == modeStopOnFinalizedEpoch {
+		headEpoch := f.finalizationFetcher.FinalizedCheckpt().Epoch
+		targetEpoch, peers = f.p2p.Peers().BestFinalized(params.BeaconConfig().MaxPeersToSync, headEpoch)
+	} else {
+		headEpoch := helpers.SlotToEpoch(f.headFetcher.HeadSlot())
+		targetEpoch, peers = f.p2p.Peers().BestNonFinalized(flags.Get().MinimumSyncPeers, headEpoch)
+	}
 	if len(peers) == 0 {
 		response.err = errNoPeersAvailable
 		return response
@@ -258,7 +265,7 @@ func (f *blocksFetcher) handleRequest(ctx context.Context, start, count uint64)

 	// Short circuit start far exceeding the highest finalized epoch in some infinite loop.
 	if f.mode == modeStopOnFinalizedEpoch {
-		highestFinalizedSlot := helpers.StartSlot(finalizedEpoch + 1)
+		highestFinalizedSlot := helpers.StartSlot(targetEpoch + 1)
 		if start > highestFinalizedSlot {
 			response.err = fmt.Errorf("%v, slot: %d, highest finalized slot: %d",
 				errSlotIsTooHigh, start, highestFinalizedSlot)
--- a/beacon-chain/sync/initial-sync/blocks_fetcher_peers.go
+++ b/beacon-chain/sync/initial-sync/blocks_fetcher_peers.go
@@ -8,6 +8,7 @@ import (
 	"time"

 	"github.com/libp2p/go-libp2p-core/peer"
+	"github.com/prysmaticlabs/prysm/beacon-chain/core/helpers"
 	"github.com/prysmaticlabs/prysm/beacon-chain/flags"
 	scorers "github.com/prysmaticlabs/prysm/beacon-chain/p2p/peers"
 	"github.com/prysmaticlabs/prysm/shared/mathutil"
@@ -71,7 +72,14 @@ func (f *blocksFetcher) waitForMinimumPeers(ctx context.Context) ([]peer.ID, err
 		if ctx.Err() != nil {
 			return nil, ctx.Err()
 		}
-		_, peers := f.p2p.Peers().BestFinalized(params.BeaconConfig().MaxPeersToSync, f.finalizationFetcher.FinalizedCheckpt().Epoch)
+		var peers []peer.ID
+		if f.mode == modeStopOnFinalizedEpoch {
+			headEpoch := f.finalizationFetcher.FinalizedCheckpt().Epoch
+			_, peers = f.p2p.Peers().BestFinalized(params.BeaconConfig().MaxPeersToSync, headEpoch)
+		} else {
+			headEpoch := helpers.SlotToEpoch(f.headFetcher.HeadSlot())
+			_, peers = f.p2p.Peers().BestNonFinalized(flags.Get().MinimumSyncPeers, headEpoch)
+		}
 		if len(peers) >= required {
 			return peers, nil
 		}
--- a/beacon-chain/sync/initial-sync/blocks_fetcher_test.go
+++ b/beacon-chain/sync/initial-sync/blocks_fetcher_test.go
@@ -479,8 +479,9 @@ func TestBlocksFetcher_requestBeaconBlocksByRange(t *testing.T) {
 	fetcher := newBlocksFetcher(
 		ctx,
 		&blocksFetcherConfig{
-			headFetcher: mc,
-			p2p:         p2p,
+			finalizationFetcher: mc,
+			headFetcher:         mc,
+			p2p:                 p2p,
 		})

 	_, peerIDs := p2p.Peers().BestFinalized(params.BeaconConfig().MaxPeersToSync, helpers.SlotToEpoch(mc.HeadSlot()))
@@ -620,8 +621,9 @@ func TestBlocksFetcher_nonSkippedSlotAfter(t *testing.T) {
 	fetcher := newBlocksFetcher(
 		ctx,
 		&blocksFetcherConfig{
-			headFetcher: mc,
-			p2p:         p2p,
+			finalizationFetcher: mc,
+			headFetcher:         mc,
+			p2p:                 p2p,
 		},
 	)
 	fetcher.rateLimiter = leakybucket.NewCollector(6400, 6400, false)
@@ -882,6 +884,7 @@ func TestBlocksFetcher_filterScoredPeers(t *testing.T) {
 		t.Run(tt.name, func(t *testing.T) {
 			mc, p2p, _ := initializeTestServices(t, []uint64{}, []*peerData{})
 			fetcher := newBlocksFetcher(context.Background(), &blocksFetcherConfig{
+				finalizationFetcher:      mc,
 				headFetcher:              mc,
 				p2p:                      p2p,
 				peerFilterCapacityWeight: tt.args.capacityWeight,
--- a/beacon-chain/sync/initial-sync/blocks_fetcher_utils.go
+++ b/beacon-chain/sync/initial-sync/blocks_fetcher_utils.go
@@ -6,6 +6,7 @@ import (
 	"github.com/libp2p/go-libp2p-core/peer"
 	"github.com/pkg/errors"
 	"github.com/prysmaticlabs/prysm/beacon-chain/core/helpers"
+	"github.com/prysmaticlabs/prysm/beacon-chain/flags"
 	p2ppb "github.com/prysmaticlabs/prysm/proto/beacon/p2p/v1"
 	"github.com/prysmaticlabs/prysm/shared/featureconfig"
 	"github.com/prysmaticlabs/prysm/shared/params"
@@ -22,15 +23,22 @@ func (f *blocksFetcher) nonSkippedSlotAfter(ctx context.Context, slot uint64) (u
 	ctx, span := trace.StartSpan(ctx, "initialsync.nonSkippedSlotAfter")
 	defer span.End()

-	headEpoch := helpers.SlotToEpoch(f.headFetcher.HeadSlot())
-	finalizedEpoch, peers := f.p2p.Peers().BestFinalized(params.BeaconConfig().MaxPeersToSync, headEpoch)
+	var targetEpoch, headEpoch uint64
+	var peers []peer.ID
+	if f.mode == modeStopOnFinalizedEpoch {
+		headEpoch = f.finalizationFetcher.FinalizedCheckpt().Epoch
+		targetEpoch, peers = f.p2p.Peers().BestFinalized(params.BeaconConfig().MaxPeersToSync, headEpoch)
+	} else {
+		headEpoch = helpers.SlotToEpoch(f.headFetcher.HeadSlot())
+		targetEpoch, peers = f.p2p.Peers().BestNonFinalized(flags.Get().MinimumSyncPeers, headEpoch)
+	}
 	log.WithFields(logrus.Fields{
-		"start":          slot,
-		"headEpoch":      headEpoch,
-		"finalizedEpoch": finalizedEpoch,
+		"start":       slot,
+		"headEpoch":   headEpoch,
+		"targetEpoch": targetEpoch,
 	}).Debug("Searching for non-skipped slot")
 	// Exit early, if no peers with high enough finalized epoch are found.
-	if finalizedEpoch <= headEpoch {
+	if targetEpoch <= headEpoch {
 		return 0, errSlotIsTooHigh
 	}
 	var err error
@@ -86,7 +94,7 @@ func (f *blocksFetcher) nonSkippedSlotAfter(ctx context.Context, slot uint64) (u
 	// Quickly find the close enough epoch where a non-empty slot definitely exists.
 	// Only single random slot per epoch is checked - allowing to move forward relatively quickly.
 	slot = slot + nonSkippedSlotsFullSearchEpochs*slotsPerEpoch
-	upperBoundSlot := helpers.StartSlot(finalizedEpoch + 1)
+	upperBoundSlot := helpers.StartSlot(targetEpoch + 1)
 	for ind := slot + 1; ind < upperBoundSlot; ind += (slotsPerEpoch * slotsPerEpoch) / 2 {
 		start := ind + uint64(f.rand.Intn(int(slotsPerEpoch)))
 		nextSlot, err := fetch(peers[pidInd%len(peers)], start, slotsPerEpoch/2, slotsPerEpoch)
@@ -109,7 +117,7 @@ func (f *blocksFetcher) nonSkippedSlotAfter(ctx context.Context, slot uint64) (u
 	if err != nil {
 		return 0, err
 	}
-	if nextSlot < slot || helpers.StartSlot(finalizedEpoch+1) < nextSlot {
+	if nextSlot < slot || helpers.StartSlot(targetEpoch+1) < nextSlot {
 		return 0, errors.New("invalid range for non-skipped slot")
 	}
 	return nextSlot, nil
@@ -120,3 +128,10 @@ func (f *blocksFetcher) bestFinalizedSlot() uint64 {
 	finalizedEpoch, _ := f.p2p.Peers().BestFinalized(params.BeaconConfig().MaxPeersToSync, f.finalizationFetcher.FinalizedCheckpt().Epoch)
 	return helpers.StartSlot(finalizedEpoch)
 }
+
+// bestNonFinalizedSlot returns the highest non-finalized slot of the majority of connected peers.
+func (f *blocksFetcher) bestNonFinalizedSlot() uint64 {
+	headEpoch := helpers.SlotToEpoch(f.headFetcher.HeadSlot())
+	targetEpoch, _ := f.p2p.Peers().BestNonFinalized(flags.Get().MinimumSyncPeers, headEpoch)
+	return helpers.StartSlot(targetEpoch)
+}
--- a/beacon-chain/sync/initial-sync/blocks_queue.go
+++ b/beacon-chain/sync/initial-sync/blocks_queue.go
@@ -94,7 +94,11 @@ func newBlocksQueue(ctx context.Context, cfg *blocksQueueConfig) *blocksQueue {
 	}
 	highestExpectedSlot := cfg.highestExpectedSlot
 	if highestExpectedSlot <= cfg.startSlot {
-		highestExpectedSlot = blocksFetcher.bestFinalizedSlot()
+		if cfg.mode == modeStopOnFinalizedEpoch {
+			highestExpectedSlot = blocksFetcher.bestFinalizedSlot()
+		} else {
+			highestExpectedSlot = blocksFetcher.bestNonFinalizedSlot()
+		}
 	}

 	// Override fetcher's sync mode.
@@ -169,9 +173,16 @@ func (q *blocksQueue) loop() {
 		// Check highest expected slot when we approach chain's head slot.
 		if q.headFetcher.HeadSlot() >= q.highestExpectedSlot {
 			// By the time initial sync is complete, highest slot may increase, re-check.
-			if q.highestExpectedSlot < q.blocksFetcher.bestFinalizedSlot() {
-				q.highestExpectedSlot = q.blocksFetcher.bestFinalizedSlot()
-				continue
+			if q.mode == modeStopOnFinalizedEpoch {
+				if q.highestExpectedSlot < q.blocksFetcher.bestFinalizedSlot() {
+					q.highestExpectedSlot = q.blocksFetcher.bestFinalizedSlot()
+					continue
+				}
+			} else {
+				if q.highestExpectedSlot < q.blocksFetcher.bestNonFinalizedSlot() {
+					q.highestExpectedSlot = q.blocksFetcher.bestNonFinalizedSlot()
+					continue
+				}
 			}
 			log.WithField("slot", q.highestExpectedSlot).Debug("Highest expected slot reached")
 			q.cancel()
@@ -366,8 +377,14 @@ func (q *blocksQueue) onProcessSkippedEvent(ctx context.Context) eventHandlerFn
 		}

 		// Check if we have enough peers to progress, or sync needs to halt (due to no peers available).
-		if q.blocksFetcher.bestFinalizedSlot() <= q.headFetcher.HeadSlot() {
-			return stateSkipped, errNoPeersWithFinalizedBlocks
+		if q.mode == modeStopOnFinalizedEpoch {
+			if q.blocksFetcher.bestFinalizedSlot() <= q.headFetcher.HeadSlot() {
+				return stateSkipped, errNoPeersWithFinalizedBlocks
+			}
+		} else {
+			if q.blocksFetcher.bestNonFinalizedSlot() <= q.headFetcher.HeadSlot() {
+				return stateSkipped, errNoPeersWithFinalizedBlocks
+			}
 		}

 		// Shift start position of all the machines except for the last one.
@@ -385,8 +402,14 @@ func (q *blocksQueue) onProcessSkippedEvent(ctx context.Context) eventHandlerFn
 		if err != nil {
 			return stateSkipped, err
 		}
-		if q.highestExpectedSlot < q.blocksFetcher.bestFinalizedSlot() {
-			q.highestExpectedSlot = q.blocksFetcher.bestFinalizedSlot()
+		if q.mode == modeStopOnFinalizedEpoch {
+			if q.highestExpectedSlot < q.blocksFetcher.bestFinalizedSlot() {
+				q.highestExpectedSlot = q.blocksFetcher.bestFinalizedSlot()
+			}
+		} else {
+			if q.highestExpectedSlot < q.blocksFetcher.bestNonFinalizedSlot() {
+				q.highestExpectedSlot = q.blocksFetcher.bestNonFinalizedSlot()
+			}
 		}
 		if nonSkippedSlot > q.highestExpectedSlot {
 			nonSkippedSlot = startSlot + blocksPerRequest*(lookaheadSteps-1)
--- a/beacon-chain/sync/initial-sync/service.go
+++ b/beacon-chain/sync/initial-sync/service.go
@@ -214,7 +214,7 @@ func (s *Service) waitForMinimumPeers() {
 		required = flags.Get().MinimumSyncPeers
 	}
 	for {
-		_, peers := s.p2p.Peers().BestFinalized(params.BeaconConfig().MaxPeersToSync, s.chain.FinalizedCheckpt().Epoch)
+		_, peers := s.p2p.Peers().BestNonFinalized(flags.Get().MinimumSyncPeers, s.chain.FinalizedCheckpt().Epoch)
 		if len(peers) >= required {
 			break
 		}
--- a/beacon-chain/sync/rpc_status.go
+++ b/beacon-chain/sync/rpc_status.go
@@ -12,6 +12,7 @@ import (
 	"github.com/libp2p/go-libp2p-core/peer"
 	"github.com/pkg/errors"
 	"github.com/prysmaticlabs/prysm/beacon-chain/core/helpers"
+	"github.com/prysmaticlabs/prysm/beacon-chain/flags"
 	"github.com/prysmaticlabs/prysm/beacon-chain/p2p"
 	"github.com/prysmaticlabs/prysm/beacon-chain/p2p/peers"
 	pb "github.com/prysmaticlabs/prysm/proto/beacon/p2p/v1"
@@ -72,7 +73,7 @@ func (s *Service) resyncIfBehind() {
 	runutil.RunEvery(s.ctx, interval, func() {
 		if s.shouldReSync() {
 			syncedEpoch := helpers.SlotToEpoch(s.chain.HeadSlot())
-			highestEpoch, _ := s.p2p.Peers().BestFinalized(params.BeaconConfig().MaxPeersToSync, syncedEpoch)
+			highestEpoch, _ := s.p2p.Peers().BestNonFinalized(flags.Get().MinimumSyncPeers, syncedEpoch)
 			if helpers.StartSlot(highestEpoch) > s.chain.HeadSlot() {
 				log.WithFields(logrus.Fields{
 					"currentEpoch": helpers.SlotToEpoch(s.chain.CurrentSlot()),