corrected logic

changelog
simple grace period (untested in kurtosis) + cgc metrics
2026-01-10 13:58:09 -05:00 · 2025-11-20 20:28:49 -03:00 · 2025-11-20 19:27:28 -03:00 · 2025-11-20 19:25:50 -03:00
4 changed files with 112 additions and 1 deletions
--- a/beacon-chain/sync/custody.go
+++ b/beacon-chain/sync/custody.go
@@ -28,6 +28,7 @@ func (s *Service) maintainCustodyInfo() {

 func (s *Service) updateCustodyInfoIfNeeded() error {
 	const minimumPeerCount = 1
+	const gracePeriodSeconds = 300 // 300-second grace period for CGC increases

 	// Get our actual custody group count.
 	actualCustodyGrounpCount, err := s.cfg.p2p.CustodyGroupCount(s.ctx)
@@ -35,12 +36,101 @@ func (s *Service) updateCustodyInfoIfNeeded() error {
 		return errors.Wrap(err, "p2p custody group count")
 	}

+	// Update the P2P custody group count metric
+	custodyGroupCountP2P.Set(float64(actualCustodyGrounpCount))
+
 	// Get our target custody group count.
 	targetCustodyGroupCount, err := s.custodyGroupCount(s.ctx)
 	if err != nil {
 		return errors.Wrap(err, "custody group count")
 	}

+	// Handle pending CGC changes with proper grace period
+	s.pendingCGCLock.Lock()
+	now := time.Now()
+
+	switch {
+	case s.pendingCGC > 0 && now.After(s.pendingCGCDeadline):
+		// Grace period expired - check if pending change is still valid
+		targetToApply := s.pendingCGC
+		s.pendingCGC = 0 // Clear the pending change
+		s.pendingCGCDeadline = time.Time{}
+		s.pendingCGCLock.Unlock()
+
+		// Only apply the pending change if current target still justifies it
+		// This prevents applying stale increases when validators have been removed
+		// or configuration has changed during the grace period
+		if targetToApply <= targetCustodyGroupCount {
+			// Pending value is still valid (at or below current target)
+			// the network still wants at least that many groups
+			// Use the current target to allow for any increases that happened during grace period
+			if targetCustodyGroupCount > actualCustodyGrounpCount {
+				log.WithFields(logrus.Fields{
+					"previousCGC": actualCustodyGrounpCount,
+					"newCGC":      targetCustodyGroupCount,
+					"pendingCGC":  targetToApply,
+				}).Info("Applying custody group count increase after grace period")
+			}
+		} else {
+			// Pending value is higher than current target - drop it as stale
+			log.WithFields(logrus.Fields{
+				"currentCGC":    actualCustodyGrounpCount,
+				"targetCGC":     targetCustodyGroupCount,
+				"stalePendingCGC": targetToApply,
+			}).Info("Dropping stale pending CGC increase as target has decreased")
+
+			// Still check if current target needs an increase (with new grace period)
+			if targetCustodyGroupCount > actualCustodyGrounpCount {
+				// Re-schedule with current target and new grace period
+				s.pendingCGCLock.Lock()
+				s.pendingCGC = targetCustodyGroupCount
+				s.pendingCGCDeadline = now.Add(time.Duration(gracePeriodSeconds) * time.Second)
+				s.pendingCGCLock.Unlock()
+
+				log.WithFields(logrus.Fields{
+					"currentCGC":  actualCustodyGrounpCount,
+					"targetCGC":   targetCustodyGroupCount,
+					"gracePeriod": gracePeriodSeconds,
+				}).Info("Re-scheduling CGC increase with updated target")
+
+				return nil
+			}
+		}
+
+	case s.pendingCGC > 0 && !now.After(s.pendingCGCDeadline):
+		// Pending change exists but grace period not expired - do nothing
+		pending := s.pendingCGC
+		timeRemaining := s.pendingCGCDeadline.Sub(now).Seconds()
+		s.pendingCGCLock.Unlock()
+
+		log.WithFields(logrus.Fields{
+			"pendingCGC":    pending,
+			"timeRemaining": timeRemaining,
+		}).Debug("Grace period still active, skipping CGC update")
+
+		return nil
+
+	default:
+		// No pending change: check if we need to schedule one
+		if targetCustodyGroupCount > actualCustodyGrounpCount {
+			// Schedule the increase with grace period
+			s.pendingCGC = targetCustodyGroupCount
+			s.pendingCGCDeadline = now.Add(time.Duration(gracePeriodSeconds) * time.Second)
+			s.pendingCGCLock.Unlock()
+
+			log.WithFields(logrus.Fields{
+				"currentCGC":    actualCustodyGrounpCount,
+				"targetCGC":     targetCustodyGroupCount,
+				"gracePeriod":   gracePeriodSeconds,
+				"effectiveTime": s.pendingCGCDeadline.Format(time.RFC3339),
+			}).Info("Scheduling custody group count increase with grace period")
+
+			return nil
+		}
+		// No change needed
+		s.pendingCGCLock.Unlock()
+	}
+
 	// If the actual custody group count is already equal to the target, skip the update.
 	if actualCustodyGrounpCount >= targetCustodyGroupCount {
 		return nil
@@ -83,7 +173,7 @@ func (s *Service) updateCustodyInfoIfNeeded() error {
 	// Update the p2p earliest available slot metric
 	earliestAvailableSlotP2P.Set(float64(storedEarliestSlot))

-	dbEarliestSlot, _, err := s.cfg.beaconDB.UpdateCustodyInfo(s.ctx, storedEarliestSlot, storedGroupCount)
+	dbEarliestSlot, dbStoredGroupCount, err := s.cfg.beaconDB.UpdateCustodyInfo(s.ctx, storedEarliestSlot, storedGroupCount)
 	if err != nil {
 		return errors.Wrap(err, "beacon db update custody info")
 	}
@@ -91,6 +181,10 @@ func (s *Service) updateCustodyInfoIfNeeded() error {
 	// Update the DB earliest available slot metric
 	earliestAvailableSlotDB.Set(float64(dbEarliestSlot))

+	// Update both custody group count metrics with their respective values
+	custodyGroupCountP2P.Set(float64(storedGroupCount))
+	custodyGroupCountDB.Set(float64(dbStoredGroupCount))
+
 	return nil
 }

--- a/beacon-chain/sync/metrics.go
+++ b/beacon-chain/sync/metrics.go
@@ -241,6 +241,16 @@ var (
 		Name: "custody_earliest_available_slot_db",
 		Help: "The earliest available slot tracked by the database for custody purposes",
 	})
+
+	// Custody group count metrics - separate for P2P and DB views
+	custodyGroupCountP2P = promauto.NewGauge(prometheus.GaugeOpts{
+		Name: "beacon_custody_group_count_p2p",
+		Help: "Current custody group count (CGC) from P2P layer",
+	})
+	custodyGroupCountDB = promauto.NewGauge(prometheus.GaugeOpts{
+		Name: "beacon_custody_group_count_db",
+		Help: "Current custody group count (CGC) stored in database",
+	})
 )

 func (s *Service) updateMetrics() {
--- a/beacon-chain/sync/service.go
+++ b/beacon-chain/sync/service.go
@@ -182,6 +182,10 @@ type Service struct {
 	dataColumnLogCh                  chan dataColumnLogEntry
 	digestActions                    perDigestSet
 	subscriptionSpawner              func(func()) // see Service.spawn for details
+	// Grace period fields for CGC changes
+	pendingCGC                       uint64
+	pendingCGCDeadline               time.Time
+	pendingCGCLock                   sync.RWMutex
 }

 // NewService initializes new regular sync service.
--- a/changelog/satushh-cgc-grace.md
+++ b/changelog/satushh-cgc-grace.md
@@ -0,0 +1,3 @@
+### Added
+
+- Grace period for cgc when it is supposed to get updated.
Author	SHA1	Message	Date
satushh	94e5b3e805	corrected logic	2025-11-20 20:28:49 -03:00
satushh	c0b8d9ca52	changelog	2025-11-20 19:27:28 -03:00
satushh	b1eeb1b1f1	simple grace period (untested in kurtosis) + cgc metrics	2025-11-20 19:25:50 -03:00