mirror of
https://github.com/OffchainLabs/prysm.git
synced 2026-01-09 07:28:06 -05:00
Fix race on ReceibeBlock (#15565)
* Fix race on ReceibeBlock In the event two routines for `ReceiveBlock` are triggered with the same block (it may happen if one routine is triggered over gossip and the other in init-sync) it may happen that the second routine believes it's syncing the block for the first time. This is because the cache on `blocksBeingSynced` is not checked to be set and the block may still not be put in forkchoice by the first routine. In the normal case this would not cause any trouble as the second forkchoice insertion is a noop by design. However, if the second routine times out or has any error in processing (for example the engine will return an error if we try to send FCU to an older head) then the second routine will attempt to remove the inserted block from forkchoice and this bricks the node since forkchoice refuses to remove a valid node, the root is removed inconditionally from db and the node ends up with a root that is not in db and remains in forkchoice. This PR just prevents the race. As a followup perhaps we can gate the rollback function from db to nodes that are effectively not in forkchoice, alternatively, force removal from forkchoice when rolling back from db (although this version is complicated due to possible accounting issues on forkchoice). * Fix lint
This commit is contained in:
@@ -7,10 +7,15 @@ type currentlySyncingBlock struct {
|
||||
roots map[[32]byte]struct{}
|
||||
}
|
||||
|
||||
func (b *currentlySyncingBlock) set(root [32]byte) {
|
||||
func (b *currentlySyncingBlock) set(root [32]byte) error {
|
||||
b.Lock()
|
||||
defer b.Unlock()
|
||||
_, ok := b.roots[root]
|
||||
if ok {
|
||||
return errBlockBeingSynced
|
||||
}
|
||||
b.roots[root] = struct{}{}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *currentlySyncingBlock) unset(root [32]byte) {
|
||||
|
||||
@@ -44,6 +44,8 @@ var (
|
||||
errMaxBlobsExceeded = verification.AsVerificationFailure(errors.New("expected commitments in block exceeds MAX_BLOBS_PER_BLOCK"))
|
||||
// errMaxDataColumnsExceeded is returned when the number of data columns exceeds the maximum allowed.
|
||||
errMaxDataColumnsExceeded = verification.AsVerificationFailure(errors.New("expected data columns for node exceeds NUMBER_OF_COLUMNS"))
|
||||
// errBlockBeingSynced is returned when a block is being synced.
|
||||
errBlockBeingSynced = errors.New("block is being synced")
|
||||
)
|
||||
|
||||
// An invalid block is the block that fails state transition based on the core protocol rules.
|
||||
|
||||
@@ -84,7 +84,11 @@ func (s *Service) ReceiveBlock(ctx context.Context, block interfaces.ReadOnlySig
|
||||
}
|
||||
|
||||
receivedTime := time.Now()
|
||||
s.blockBeingSynced.set(blockRoot)
|
||||
err := s.blockBeingSynced.set(blockRoot)
|
||||
if errors.Is(err, errBlockBeingSynced) {
|
||||
log.WithField("blockRoot", fmt.Sprintf("%#x", blockRoot)).Debug("Ignoring block currently being synced")
|
||||
return nil
|
||||
}
|
||||
defer s.blockBeingSynced.unset(blockRoot)
|
||||
|
||||
blockCopy, err := block.Copy()
|
||||
|
||||
@@ -311,7 +311,10 @@ func TestService_HasBlock(t *testing.T) {
|
||||
r, err = b.Block.HashTreeRoot()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, true, s.HasBlock(t.Context(), r))
|
||||
s.blockBeingSynced.set(r)
|
||||
err = s.blockBeingSynced.set(r)
|
||||
require.NoError(t, err)
|
||||
err = s.blockBeingSynced.set(r)
|
||||
require.ErrorIs(t, err, errBlockBeingSynced)
|
||||
require.Equal(t, false, s.HasBlock(t.Context(), r))
|
||||
}
|
||||
|
||||
|
||||
3
changelog/potuz_double_receive_block.md
Normal file
3
changelog/potuz_double_receive_block.md
Normal file
@@ -0,0 +1,3 @@
|
||||
### Fixed
|
||||
|
||||
- Prevent a race on double `ReceiveBlock`.
|
||||
Reference in New Issue
Block a user