From 0f6070a866341ea53f1536b64a07b366a3bedc2c Mon Sep 17 00:00:00 2001 From: Potuz Date: Wed, 6 Aug 2025 14:18:13 -0300 Subject: [PATCH] Fix race on ReceibeBlock (#15565) * Fix race on ReceibeBlock In the event two routines for `ReceiveBlock` are triggered with the same block (it may happen if one routine is triggered over gossip and the other in init-sync) it may happen that the second routine believes it's syncing the block for the first time. This is because the cache on `blocksBeingSynced` is not checked to be set and the block may still not be put in forkchoice by the first routine. In the normal case this would not cause any trouble as the second forkchoice insertion is a noop by design. However, if the second routine times out or has any error in processing (for example the engine will return an error if we try to send FCU to an older head) then the second routine will attempt to remove the inserted block from forkchoice and this bricks the node since forkchoice refuses to remove a valid node, the root is removed inconditionally from db and the node ends up with a root that is not in db and remains in forkchoice. This PR just prevents the race. As a followup perhaps we can gate the rollback function from db to nodes that are effectively not in forkchoice, alternatively, force removal from forkchoice when rolling back from db (although this version is complicated due to possible accounting issues on forkchoice). * Fix lint --- beacon-chain/blockchain/currently_syncing_block.go | 7 ++++++- beacon-chain/blockchain/error.go | 2 ++ beacon-chain/blockchain/receive_block.go | 6 +++++- beacon-chain/blockchain/receive_block_test.go | 5 ++++- changelog/potuz_double_receive_block.md | 3 +++ 5 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 changelog/potuz_double_receive_block.md diff --git a/beacon-chain/blockchain/currently_syncing_block.go b/beacon-chain/blockchain/currently_syncing_block.go index c9f612b354..3c9ccd1edb 100644 --- a/beacon-chain/blockchain/currently_syncing_block.go +++ b/beacon-chain/blockchain/currently_syncing_block.go @@ -7,10 +7,15 @@ type currentlySyncingBlock struct { roots map[[32]byte]struct{} } -func (b *currentlySyncingBlock) set(root [32]byte) { +func (b *currentlySyncingBlock) set(root [32]byte) error { b.Lock() defer b.Unlock() + _, ok := b.roots[root] + if ok { + return errBlockBeingSynced + } b.roots[root] = struct{}{} + return nil } func (b *currentlySyncingBlock) unset(root [32]byte) { diff --git a/beacon-chain/blockchain/error.go b/beacon-chain/blockchain/error.go index a530f8adba..50f535f387 100644 --- a/beacon-chain/blockchain/error.go +++ b/beacon-chain/blockchain/error.go @@ -44,6 +44,8 @@ var ( errMaxBlobsExceeded = verification.AsVerificationFailure(errors.New("expected commitments in block exceeds MAX_BLOBS_PER_BLOCK")) // errMaxDataColumnsExceeded is returned when the number of data columns exceeds the maximum allowed. errMaxDataColumnsExceeded = verification.AsVerificationFailure(errors.New("expected data columns for node exceeds NUMBER_OF_COLUMNS")) + // errBlockBeingSynced is returned when a block is being synced. + errBlockBeingSynced = errors.New("block is being synced") ) // An invalid block is the block that fails state transition based on the core protocol rules. diff --git a/beacon-chain/blockchain/receive_block.go b/beacon-chain/blockchain/receive_block.go index e28235cc9c..c57ced1043 100644 --- a/beacon-chain/blockchain/receive_block.go +++ b/beacon-chain/blockchain/receive_block.go @@ -84,7 +84,11 @@ func (s *Service) ReceiveBlock(ctx context.Context, block interfaces.ReadOnlySig } receivedTime := time.Now() - s.blockBeingSynced.set(blockRoot) + err := s.blockBeingSynced.set(blockRoot) + if errors.Is(err, errBlockBeingSynced) { + log.WithField("blockRoot", fmt.Sprintf("%#x", blockRoot)).Debug("Ignoring block currently being synced") + return nil + } defer s.blockBeingSynced.unset(blockRoot) blockCopy, err := block.Copy() diff --git a/beacon-chain/blockchain/receive_block_test.go b/beacon-chain/blockchain/receive_block_test.go index 8e8f643029..0236a21f7c 100644 --- a/beacon-chain/blockchain/receive_block_test.go +++ b/beacon-chain/blockchain/receive_block_test.go @@ -311,7 +311,10 @@ func TestService_HasBlock(t *testing.T) { r, err = b.Block.HashTreeRoot() require.NoError(t, err) require.Equal(t, true, s.HasBlock(t.Context(), r)) - s.blockBeingSynced.set(r) + err = s.blockBeingSynced.set(r) + require.NoError(t, err) + err = s.blockBeingSynced.set(r) + require.ErrorIs(t, err, errBlockBeingSynced) require.Equal(t, false, s.HasBlock(t.Context(), r)) } diff --git a/changelog/potuz_double_receive_block.md b/changelog/potuz_double_receive_block.md new file mode 100644 index 0000000000..d55f5f5e19 --- /dev/null +++ b/changelog/potuz_double_receive_block.md @@ -0,0 +1,3 @@ +### Fixed + +- Prevent a race on double `ReceiveBlock`.