Add diagnostic logging for 'invalid data returned from peer' errors (#15674)

When peers return invalid data during initial sync, log the specific validation failure reason. This helps identify: - Whether peer exceeded requested block count - Whether peer exceeded MAX_REQUEST_BLOCKS protocol limit - Whether blocks are outside the requested slot range - Whether blocks are out of order (not increasing or wrong step) Each log includes the specific condition that failed, making it easier to debug whether the issue is with peer implementations or request validation logic.
2026-01-09 15:37:56 -05:00 · 2025-09-09 17:08:37 -05:00
parent 029b896c79
commit f690af81fa
3 changed files with 61 additions and 3 deletions
--- a/beacon-chain/sync/initial-sync/blocks_fetcher.go
+++ b/beacon-chain/sync/initial-sync/blocks_fetcher.go
@@ -450,7 +450,12 @@ func (f *blocksFetcher) fetchBlocksFromPeer(
 	for _, p := range peers {
 		blocks, err := f.requestBlocks(ctx, req, p)
 		if err != nil {
-			log.WithField("peer", p).WithError(err).Debug("Could not request blocks by range from peer")
+			log.WithFields(logrus.Fields{
+				"peer":      p,
+				"startSlot": req.StartSlot,
+				"count":     req.Count,
+				"step":      req.Step,
+			}).WithError(err).Debug("Could not request blocks by range from peer")
 			continue
 		}
 		f.p2p.Peers().Scorers().BlockProviderScorer().Touch(p)
--- a/beacon-chain/sync/rpc_send_request.go
+++ b/beacon-chain/sync/rpc_send_request.go
@@ -92,22 +92,72 @@ func SendBeaconBlocksByRangeRequest(
 		// The response MUST contain no more than `count` blocks, and no more than
 		// MAX_REQUEST_BLOCKS blocks.
 		currentEpoch := slots.ToEpoch(tor.CurrentSlot())
-		if i >= req.Count || i >= params.MaxRequestBlock(currentEpoch) {
+		maxBlocks := params.MaxRequestBlock(currentEpoch)
+		if i >= req.Count {
+			log.WithFields(logrus.Fields{
+				"blockIndex":     i,
+				"requestedCount": req.Count,
+				"blockSlot":      blk.Block().Slot(),
+				"peer":           pid,
+				"reason":         "exceeded requested count",
+			}).Debug("Peer returned invalid data: too many blocks")
+			return nil, ErrInvalidFetchedData
+		}
+		if i >= maxBlocks {
+			log.WithFields(logrus.Fields{
+				"blockIndex":   i,
+				"maxBlocks":    maxBlocks,
+				"currentEpoch": currentEpoch,
+				"blockSlot":    blk.Block().Slot(),
+				"peer":         pid,
+				"reason":       "exceeded MAX_REQUEST_BLOCKS",
+			}).Debug("Peer returned invalid data: exceeded protocol limit")
 			return nil, ErrInvalidFetchedData
 		}
 		// Returned blocks MUST be in the slot range [start_slot, start_slot + count * step).
-		if blk.Block().Slot() < req.StartSlot || blk.Block().Slot() >= req.StartSlot.Add(req.Count*req.Step) {
+		endSlot := req.StartSlot.Add(req.Count * req.Step)
+		if blk.Block().Slot() < req.StartSlot {
+			log.WithFields(logrus.Fields{
+				"blockSlot":      blk.Block().Slot(),
+				"requestedStart": req.StartSlot,
+				"peer":           pid,
+				"reason":         "block slot before requested start",
+			}).Debug("Peer returned invalid data: block too early")
+			return nil, ErrInvalidFetchedData
+		}
+		if blk.Block().Slot() >= endSlot {
+			log.WithFields(logrus.Fields{
+				"blockSlot":      blk.Block().Slot(),
+				"requestedStart": req.StartSlot,
+				"requestedEnd":   endSlot,
+				"requestedCount": req.Count,
+				"requestedStep":  req.Step,
+				"peer":           pid,
+				"reason":         "block slot >= start + count*step",
+			}).Debug("Peer returned invalid data: block beyond range")
 			return nil, ErrInvalidFetchedData
 		}
 		// Returned blocks, where they exist, MUST be sent in a consecutive order.
 		// Consecutive blocks MUST have values in `step` increments (slots may be skipped in between).
 		isSlotOutOfOrder := false
+		outOfOrderReason := ""
 		if prevSlot >= blk.Block().Slot() {
 			isSlotOutOfOrder = true
+			outOfOrderReason = "slot not increasing"
 		} else if req.Step != 0 && blk.Block().Slot().SubSlot(prevSlot).Mod(req.Step) != 0 {
 			isSlotOutOfOrder = true
+			slotDiff := blk.Block().Slot().SubSlot(prevSlot)
+			outOfOrderReason = fmt.Sprintf("slot diff %d not multiple of step %d", slotDiff, req.Step)
 		}
 		if !isFirstChunk && isSlotOutOfOrder {
+			log.WithFields(logrus.Fields{
+				"blockSlot":     blk.Block().Slot(),
+				"prevSlot":      prevSlot,
+				"requestedStep": req.Step,
+				"blockIndex":    i,
+				"peer":          pid,
+				"reason":        outOfOrderReason,
+			}).Debug("Peer returned invalid data: blocks out of order")
 			return nil, ErrInvalidFetchedData
 		}
 		prevSlot = blk.Block().Slot()