Add state diff serialization (#15250)

* Add serialization code for state diffs

Adds serialization code for state diffs.
Adds code to create and apply state diffs
Adds fuzz tests and benchmarks for serialization/deserialization

Co-authored-by: Claude <noreply@anthropic.com>

* Add Fulu support

* Review #1

* gazelle

* Fix some fuzzers

* Failing cases from the fuzzers in consensus-types/hdiff

* Fix more fuzz tests

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

* add comparison tests

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

* Use ConvertToElectra in UpgradeToElectra

* Add comments on constants

* Fix readEth1Data

* remove colons from error messages

* Add design doc

* Apply suggestions from code review

Bast

Co-authored-by: Bastin <43618253+Inspector-Butters@users.noreply.github.com>

---------

Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: Preston Van Loon <preston@pvl.dev>
Co-authored-by: Bastin <43618253+Inspector-Butters@users.noreply.github.com>
This commit is contained in:
Potuz
2025-10-20 18:52:32 -03:00
committed by GitHub
parent a3baf98b05
commit 426fbcc3b0
25 changed files with 6359 additions and 169 deletions

View File

@@ -0,0 +1,57 @@
load("@prysm//tools/go:def.bzl", "go_library", "go_test")
go_library(
name = "go_default_library",
srcs = ["state_diff.go"],
importpath = "github.com/OffchainLabs/prysm/v6/consensus-types/hdiff",
visibility = ["//visibility:public"],
deps = [
"//beacon-chain/core/altair:go_default_library",
"//beacon-chain/core/capella:go_default_library",
"//beacon-chain/core/deneb:go_default_library",
"//beacon-chain/core/electra:go_default_library",
"//beacon-chain/core/execution:go_default_library",
"//beacon-chain/core/fulu:go_default_library",
"//beacon-chain/state:go_default_library",
"//config/fieldparams:go_default_library",
"//consensus-types/blocks:go_default_library",
"//consensus-types/helpers:go_default_library",
"//consensus-types/interfaces:go_default_library",
"//consensus-types/primitives:go_default_library",
"//proto/engine/v1:go_default_library",
"//proto/prysm/v1alpha1:go_default_library",
"//runtime/version:go_default_library",
"@com_github_golang_snappy//:go_default_library",
"@com_github_pkg_errors//:go_default_library",
"@com_github_prysmaticlabs_fastssz//:go_default_library",
"@com_github_prysmaticlabs_go_bitfield//:go_default_library",
"@com_github_sirupsen_logrus//:go_default_library",
"@org_golang_google_protobuf//proto:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = [
"fuzz_test.go",
"property_test.go",
"security_test.go",
"state_diff_test.go",
],
data = glob(["testdata/**"]),
embed = [":go_default_library"],
deps = [
"//beacon-chain/core/transition:go_default_library",
"//beacon-chain/state:go_default_library",
"//beacon-chain/state/state-native:go_default_library",
"//config/fieldparams:go_default_library",
"//consensus-types/blocks:go_default_library",
"//consensus-types/primitives:go_default_library",
"//proto/prysm/v1alpha1:go_default_library",
"//runtime/version:go_default_library",
"//testing/require:go_default_library",
"//testing/util:go_default_library",
"@com_github_golang_snappy//:go_default_library",
"@com_github_pkg_errors//:go_default_library",
],
)

Binary file not shown.

After

Width:  |  Height:  |  Size: 383 KiB

View File

@@ -0,0 +1,636 @@
package hdiff
import (
"context"
"encoding/binary"
"strconv"
"strings"
"testing"
"github.com/OffchainLabs/prysm/v6/consensus-types/primitives"
"github.com/OffchainLabs/prysm/v6/testing/util"
)
const maxFuzzValidators = 10000
const maxFuzzStateDiffSize = 1000
const maxFuzzHistoricalRoots = 10000
const maxFuzzDecodedSize = maxFuzzStateDiffSize * 10
const maxFuzzScanRange = 200
const fuzzRootsLengthOffset = 16
const maxFuzzInputSize = 10
const oneEthInGwei = 1000000000
// FuzzNewHdiff tests parsing variations of realistic diffs
func FuzzNewHdiff(f *testing.F) {
// Add seed corpus with various valid diffs from realistic scenarios
sizes := []uint64{8, 16, 32}
for _, size := range sizes {
source, _ := util.DeterministicGenesisStateElectra(f, size)
// Create various realistic target states
scenarios := []string{"slot_change", "balance_change", "validator_change", "multiple_changes"}
for _, scenario := range scenarios {
target := source.Copy()
switch scenario {
case "slot_change":
_ = target.SetSlot(source.Slot() + 1)
case "balance_change":
balances := target.Balances()
if len(balances) > 0 {
balances[0] += 1000000000
_ = target.SetBalances(balances)
}
case "validator_change":
validators := target.Validators()
if len(validators) > 0 {
validators[0].EffectiveBalance += 1000000000
_ = target.SetValidators(validators)
}
case "multiple_changes":
_ = target.SetSlot(source.Slot() + 5)
balances := target.Balances()
validators := target.Validators()
if len(balances) > 0 && len(validators) > 0 {
balances[0] += 2000000000
validators[0].EffectiveBalance += 1000000000
_ = target.SetBalances(balances)
_ = target.SetValidators(validators)
}
}
validDiff, err := Diff(source, target)
if err == nil {
f.Add(validDiff.StateDiff, validDiff.ValidatorDiffs, validDiff.BalancesDiff)
}
}
}
f.Fuzz(func(t *testing.T, stateDiff, validatorDiffs, balancesDiff []byte) {
// Limit input sizes to reasonable bounds
if len(stateDiff) > 5000 || len(validatorDiffs) > 5000 || len(balancesDiff) > 5000 {
return
}
// Bound historical roots length in stateDiff (if it contains snappy-compressed data)
// The historicalRootsLength is read after snappy decompression, but we can still
// limit the compressed input size to prevent extreme decompression ratios
if len(stateDiff) > maxFuzzStateDiffSize {
// Limit stateDiff to prevent potential memory bombs from snappy decompression
stateDiff = stateDiff[:maxFuzzStateDiffSize]
}
// Bound validator count in validatorDiffs
if len(validatorDiffs) >= 8 {
count := binary.LittleEndian.Uint64(validatorDiffs[0:8])
if count >= maxFuzzValidators {
boundedCount := count % maxFuzzValidators
binary.LittleEndian.PutUint64(validatorDiffs[0:8], boundedCount)
}
}
// Bound balance count in balancesDiff
if len(balancesDiff) >= 8 {
count := binary.LittleEndian.Uint64(balancesDiff[0:8])
if count >= maxFuzzValidators {
boundedCount := count % maxFuzzValidators
binary.LittleEndian.PutUint64(balancesDiff[0:8], boundedCount)
}
}
input := HdiffBytes{
StateDiff: stateDiff,
ValidatorDiffs: validatorDiffs,
BalancesDiff: balancesDiff,
}
// Test parsing - should not panic even with corrupted but bounded data
_, err := newHdiff(input)
_ = err // Expected to fail with corrupted data
})
}
// FuzzNewStateDiff tests the newStateDiff function with valid random state diffs
func FuzzNewStateDiff(f *testing.F) {
f.Fuzz(func(t *testing.T, validatorCount uint8, slotDelta uint64, balanceData []byte, validatorData []byte) {
defer func() {
if r := recover(); r != nil {
t.Errorf("newStateDiff panicked: %v", r)
}
}()
// Bound validator count to reasonable range
validators := uint64(validatorCount%32 + 8) // 8-39 validators
if slotDelta > 100 {
slotDelta = slotDelta % 100
}
// Generate random source state
source, _ := util.DeterministicGenesisStateElectra(t, validators)
target := source.Copy()
// Apply random slot change
_ = target.SetSlot(source.Slot() + primitives.Slot(slotDelta))
// Apply random balance changes
if len(balanceData) >= 8 {
balances := target.Balances()
numChanges := int(binary.LittleEndian.Uint64(balanceData[:8])) % len(balances)
for i := 0; i < numChanges && i*8+8 < len(balanceData); i++ {
idx := i % len(balances)
delta := int64(binary.LittleEndian.Uint64(balanceData[i*8+8:(i+1)*8+8]))
// Keep delta reasonable
delta = delta % oneEthInGwei // Max 1 ETH change
if delta < 0 && uint64(-delta) > balances[idx] {
balances[idx] = 0
} else if delta < 0 {
balances[idx] -= uint64(-delta)
} else {
balances[idx] += uint64(delta)
}
}
_ = target.SetBalances(balances)
}
// Apply random validator changes
if len(validatorData) > 0 {
validators := target.Validators()
numChanges := int(validatorData[0]) % len(validators)
for i := 0; i < numChanges && i < len(validatorData)-1; i++ {
idx := i % len(validators)
if validatorData[i+1]%2 == 0 {
validators[idx].EffectiveBalance += oneEthInGwei // 1 ETH
}
}
_ = target.SetValidators(validators)
}
// Create diff between source and target
diff, err := Diff(source, target)
if err != nil {
return // Skip if diff creation fails
}
// Test newStateDiff with the valid serialized diff from StateDiff field
reconstructed, err := newStateDiff(diff.StateDiff)
if err != nil {
t.Errorf("newStateDiff failed on valid diff: %v", err)
return
}
// Basic validation that reconstruction worked
if reconstructed == nil {
t.Error("newStateDiff returned nil without error")
}
})
}
// FuzzNewValidatorDiffs tests validator diff deserialization with valid diffs
func FuzzNewValidatorDiffs(f *testing.F) {
f.Fuzz(func(t *testing.T, validatorCount uint8, changeData []byte) {
defer func() {
if r := recover(); r != nil {
t.Errorf("newValidatorDiffs panicked: %v", r)
}
}()
// Bound validator count to reasonable range
validators := uint64(validatorCount%16 + 4) // 4-19 validators
// Generate random source state
source, _ := util.DeterministicGenesisStateElectra(t, validators)
target := source.Copy()
// Apply random validator changes based on changeData
if len(changeData) > 0 {
vals := target.Validators()
numChanges := int(changeData[0]) % len(vals)
for i := 0; i < numChanges && i < len(changeData)-1; i++ {
idx := i % len(vals)
changeType := changeData[i+1] % 4
switch changeType {
case 0: // Change effective balance
vals[idx].EffectiveBalance += oneEthInGwei
case 1: // Toggle slashed status
vals[idx].Slashed = !vals[idx].Slashed
case 2: // Change activation epoch
vals[idx].ActivationEpoch++
case 3: // Change exit epoch
vals[idx].ExitEpoch++
}
}
_ = target.SetValidators(vals)
}
// Create diff between source and target
diff, err := Diff(source, target)
if err != nil {
return // Skip if diff creation fails
}
// Test newValidatorDiffs with the valid serialized diff
reconstructed, err := newValidatorDiffs(diff.ValidatorDiffs)
if err != nil {
t.Errorf("newValidatorDiffs failed on valid diff: %v", err)
return
}
// Basic validation that reconstruction worked
if reconstructed == nil {
t.Error("newValidatorDiffs returned nil without error")
}
})
}
// FuzzNewBalancesDiff tests balance diff deserialization with valid diffs
func FuzzNewBalancesDiff(f *testing.F) {
f.Fuzz(func(t *testing.T, balanceCount uint8, balanceData []byte) {
defer func() {
if r := recover(); r != nil {
t.Errorf("newBalancesDiff panicked: %v", r)
}
}()
// Bound balance count to reasonable range
numBalances := int(balanceCount%32 + 8) // 8-39 balances
// Generate simple source state
source, _ := util.DeterministicGenesisStateElectra(t, uint64(numBalances))
target := source.Copy()
// Apply random balance changes based on balanceData
if len(balanceData) >= 8 {
balances := target.Balances()
numChanges := int(binary.LittleEndian.Uint64(balanceData[:8])) % numBalances
for i := 0; i < numChanges && i*8+8 < len(balanceData); i++ {
idx := i % numBalances
delta := int64(binary.LittleEndian.Uint64(balanceData[i*8+8:(i+1)*8+8]))
// Keep delta reasonable
delta = delta % oneEthInGwei // Max 1 ETH change
if delta < 0 && uint64(-delta) > balances[idx] {
balances[idx] = 0
} else if delta < 0 {
balances[idx] -= uint64(-delta)
} else {
balances[idx] += uint64(delta)
}
}
_ = target.SetBalances(balances)
}
// Create diff between source and target to get BalancesDiff
diff, err := Diff(source, target)
if err != nil {
return // Skip if diff creation fails
}
// Test newBalancesDiff with the valid serialized diff
reconstructed, err := newBalancesDiff(diff.BalancesDiff)
if err != nil {
t.Errorf("newBalancesDiff failed on valid diff: %v", err)
return
}
// Basic validation that reconstruction worked
if reconstructed == nil {
t.Error("newBalancesDiff returned nil without error")
}
})
}
// FuzzApplyDiff tests applying variations of valid diffs
func FuzzApplyDiff(f *testing.F) {
// Test with realistic state variations, not random data
ctx := context.Background()
// Add seed corpus with various valid scenarios
sizes := []uint64{8, 16, 32, 64}
for _, size := range sizes {
source, _ := util.DeterministicGenesisStateElectra(f, size)
target := source.Copy()
// Different types of realistic changes
scenarios := []func(){
func() { _ = target.SetSlot(source.Slot() + 1) }, // Slot change
func() { // Balance change
balances := target.Balances()
if len(balances) > 0 {
balances[0] += 1000000000 // 1 ETH
_ = target.SetBalances(balances)
}
},
func() { // Validator change
validators := target.Validators()
if len(validators) > 0 {
validators[0].EffectiveBalance += 1000000000
_ = target.SetValidators(validators)
}
},
}
for _, scenario := range scenarios {
testTarget := source.Copy()
scenario()
validDiff, err := Diff(source, testTarget)
if err == nil {
f.Add(validDiff.StateDiff, validDiff.ValidatorDiffs, validDiff.BalancesDiff)
}
}
}
f.Fuzz(func(t *testing.T, stateDiff, validatorDiffs, balancesDiff []byte) {
// Only test with reasonable sized inputs
if len(stateDiff) > 10000 || len(validatorDiffs) > 10000 || len(balancesDiff) > 10000 {
return
}
// Bound historical roots length in stateDiff (same as FuzzNewHdiff)
if len(stateDiff) > maxFuzzStateDiffSize {
stateDiff = stateDiff[:maxFuzzStateDiffSize]
}
// Bound validator count in validatorDiffs
if len(validatorDiffs) >= 8 {
count := binary.LittleEndian.Uint64(validatorDiffs[0:8])
if count >= maxFuzzValidators {
boundedCount := count % maxFuzzValidators
binary.LittleEndian.PutUint64(validatorDiffs[0:8], boundedCount)
}
}
// Bound balance count in balancesDiff
if len(balancesDiff) >= 8 {
count := binary.LittleEndian.Uint64(balancesDiff[0:8])
if count >= maxFuzzValidators {
boundedCount := count % maxFuzzValidators
binary.LittleEndian.PutUint64(balancesDiff[0:8], boundedCount)
}
}
// Create fresh source state for each test
source, _ := util.DeterministicGenesisStateElectra(t, 8)
diff := HdiffBytes{
StateDiff: stateDiff,
ValidatorDiffs: validatorDiffs,
BalancesDiff: balancesDiff,
}
// Apply diff - errors are expected for fuzzed data
_, err := ApplyDiff(ctx, source, diff)
_ = err // Expected to fail with invalid data
})
}
// FuzzReadPendingAttestation tests the pending attestation deserialization
func FuzzReadPendingAttestation(f *testing.F) {
// Add edge cases - this function is particularly vulnerable
f.Add([]byte{})
f.Add([]byte{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}) // 8 bytes
f.Add(make([]byte, 200)) // Larger than expected
// Add a case with large reported length
largeLength := make([]byte, 8)
binary.LittleEndian.PutUint64(largeLength, 0xFFFFFFFF) // Large bits length
f.Add(largeLength)
f.Fuzz(func(t *testing.T, data []byte) {
defer func() {
if r := recover(); r != nil {
t.Errorf("readPendingAttestation panicked: %v", r)
}
}()
// Make a copy since the function modifies the slice
dataCopy := make([]byte, len(data))
copy(dataCopy, data)
// Bound the bits length by modifying the first 8 bytes if they exist
if len(dataCopy) >= 8 {
// Read the bits length and bound it to maxFuzzValidators
bitsLength := binary.LittleEndian.Uint64(dataCopy[0:8])
if bitsLength >= maxFuzzValidators {
boundedLength := bitsLength % maxFuzzValidators
binary.LittleEndian.PutUint64(dataCopy[0:8], boundedLength)
}
}
_, err := readPendingAttestation(&dataCopy)
_ = err
})
}
// FuzzKmpIndex tests the KMP algorithm implementation
func FuzzKmpIndex(f *testing.F) {
// Test with integer pointers to match the actual usage
f.Add("1,2,3", "4,5,6")
f.Add("1,2,3", "1,2,3")
f.Add("", "1,2,3")
f.Add("1,1,1", "2,2,2")
f.Fuzz(func(t *testing.T, sourceStr string, targetStr string) {
defer func() {
if r := recover(); r != nil {
t.Errorf("kmpIndex panicked: %v", r)
}
}()
// Parse comma-separated strings into int slices
var source, target []int
if sourceStr != "" {
for _, s := range strings.Split(sourceStr, ",") {
if val, err := strconv.Atoi(strings.TrimSpace(s)); err == nil {
source = append(source, val)
}
}
}
if targetStr != "" {
for _, s := range strings.Split(targetStr, ",") {
if val, err := strconv.Atoi(strings.TrimSpace(s)); err == nil {
target = append(target, val)
}
}
}
// Maintain the precondition: concatenate target with source
// This matches how kmpIndex is actually called in production
combined := make([]int, len(target)+len(source))
copy(combined, target)
copy(combined[len(target):], source)
// Convert to pointer slices as used in actual code
combinedPtrs := make([]*int, len(combined))
for i := range combined {
val := combined[i]
combinedPtrs[i] = &val
}
integerEquals := func(a, b *int) bool {
if a == nil && b == nil {
return true
}
if a == nil || b == nil {
return false
}
return *a == *b
}
result := kmpIndex(len(source), combinedPtrs, integerEquals)
// Basic sanity check: result should be in [0, len(source)]
if result < 0 || result > len(source) {
t.Errorf("kmpIndex returned invalid result: %d for source length=%d", result, len(source))
}
})
}
// FuzzComputeLPS tests the LPS computation for KMP
func FuzzComputeLPS(f *testing.F) {
// Add seed cases
f.Add("1,2,1")
f.Add("1,1,1")
f.Add("1,2,3,4")
f.Add("")
f.Fuzz(func(t *testing.T, patternStr string) {
defer func() {
if r := recover(); r != nil {
t.Errorf("computeLPS panicked: %v", r)
}
}()
// Parse comma-separated string into int slice
var pattern []int
if patternStr != "" {
for _, s := range strings.Split(patternStr, ",") {
if val, err := strconv.Atoi(strings.TrimSpace(s)); err == nil {
pattern = append(pattern, val)
}
}
}
// Convert to pointer slice
patternPtrs := make([]*int, len(pattern))
for i := range pattern {
val := pattern[i]
patternPtrs[i] = &val
}
integerEquals := func(a, b *int) bool {
if a == nil && b == nil {
return true
}
if a == nil || b == nil {
return false
}
return *a == *b
}
result := computeLPS(patternPtrs, integerEquals)
// Verify result length matches input
if len(result) != len(pattern) {
t.Errorf("computeLPS returned wrong length: got %d, expected %d", len(result), len(pattern))
}
// Verify all LPS values are non-negative and within bounds
for i, lps := range result {
if lps < 0 || lps > i {
t.Errorf("Invalid LPS value at index %d: %d", i, lps)
}
}
})
}
// FuzzDiffToBalances tests balance diff computation
func FuzzDiffToBalances(f *testing.F) {
f.Fuzz(func(t *testing.T, sourceData, targetData []byte) {
defer func() {
if r := recover(); r != nil {
t.Errorf("diffToBalances panicked: %v", r)
}
}()
// Convert byte data to balance arrays
var sourceBalances, targetBalances []uint64
// Parse source balances (8 bytes per uint64)
for i := 0; i+7 < len(sourceData) && len(sourceBalances) < 100; i += 8 {
balance := binary.LittleEndian.Uint64(sourceData[i : i+8])
sourceBalances = append(sourceBalances, balance)
}
// Parse target balances
for i := 0; i+7 < len(targetData) && len(targetBalances) < 100; i += 8 {
balance := binary.LittleEndian.Uint64(targetData[i : i+8])
targetBalances = append(targetBalances, balance)
}
// Create states with the provided balances
source, _ := util.DeterministicGenesisStateElectra(t, 1)
target, _ := util.DeterministicGenesisStateElectra(t, 1)
if len(sourceBalances) > 0 {
_ = source.SetBalances(sourceBalances)
}
if len(targetBalances) > 0 {
_ = target.SetBalances(targetBalances)
}
result, err := diffToBalances(source, target)
// If no error, verify result consistency
if err == nil && len(result) > 0 {
// Result length should match target length
if len(result) != len(target.Balances()) {
t.Errorf("diffToBalances result length mismatch: got %d, expected %d",
len(result), len(target.Balances()))
}
}
})
}
// FuzzValidatorsEqual tests validator comparison
func FuzzValidatorsEqual(f *testing.F) {
f.Fuzz(func(t *testing.T, data []byte) {
defer func() {
if r := recover(); r != nil {
t.Errorf("validatorsEqual panicked: %v", r)
}
}()
// Create two validators and fuzz their fields
if len(data) < 16 {
return
}
source, _ := util.DeterministicGenesisStateElectra(t, 2)
validators := source.Validators()
if len(validators) < 2 {
return
}
val1 := validators[0]
val2 := validators[1]
// Modify validator fields based on fuzz data
if len(data) > 0 && data[0]%2 == 0 {
val2.EffectiveBalance = val1.EffectiveBalance + uint64(data[0])
}
if len(data) > 1 && data[1]%2 == 0 {
val2.Slashed = !val1.Slashed
}
// Create ReadOnlyValidator wrappers if needed
// Since validatorsEqual expects ReadOnlyValidator interface,
// we'll skip this test for now as it requires state wrapper implementation
_ = val1
_ = val2
})
}

View File

@@ -0,0 +1,403 @@
package hdiff
import (
"encoding/binary"
"math"
"testing"
"time"
"github.com/OffchainLabs/prysm/v6/consensus-types/primitives"
"github.com/OffchainLabs/prysm/v6/testing/require"
"github.com/OffchainLabs/prysm/v6/testing/util"
)
// maxSafeBalance ensures balances can be safely cast to int64 for diff computation
const maxSafeBalance = 1<<52 - 1
// PropertyTestRoundTrip verifies that diff->apply is idempotent with realistic data
func FuzzPropertyRoundTrip(f *testing.F) {
f.Fuzz(func(t *testing.T, slotDelta uint64, balanceData []byte, validatorData []byte) {
// Limit to realistic ranges
if slotDelta > 32 { // Max one epoch
slotDelta = slotDelta % 32
}
// Convert byte data to realistic deltas and changes
var balanceDeltas []int64
var validatorChanges []bool
// Parse balance deltas - limit to realistic amounts (8 bytes per int64)
for i := 0; i+7 < len(balanceData) && len(balanceDeltas) < 20; i += 8 {
delta := int64(binary.LittleEndian.Uint64(balanceData[i : i+8]))
// Keep deltas realistic (max 10 ETH change)
if delta > 10000000000 {
delta = delta % 10000000000
}
if delta < -10000000000 {
delta = -((-delta) % 10000000000)
}
balanceDeltas = append(balanceDeltas, delta)
}
// Parse validator changes (1 byte per bool) - limit to small number
for i := 0; i < len(validatorData) && len(validatorChanges) < 10; i++ {
validatorChanges = append(validatorChanges, validatorData[i]%2 == 0)
}
ctx := t.Context()
// Create source state with reasonable size
validatorCount := uint64(len(validatorChanges) + 8) // Minimum 8 validators
if validatorCount > 64 {
validatorCount = 64 // Cap at 64 for performance
}
source, _ := util.DeterministicGenesisStateElectra(t, validatorCount)
// Create target state with modifications
target := source.Copy()
// Apply slot change
_ = target.SetSlot(source.Slot() + primitives.Slot(slotDelta))
// Apply realistic balance changes
if len(balanceDeltas) > 0 {
balances := target.Balances()
for i, delta := range balanceDeltas {
if i >= len(balances) {
break
}
// Apply realistic balance changes with safe bounds
if delta < 0 {
if uint64(-delta) > balances[i] {
balances[i] = 0 // Can't go below 0
} else {
balances[i] -= uint64(-delta)
}
} else {
// Cap at reasonable maximum (1000 ETH)
maxBalance := uint64(1000000000000) // 1000 ETH in Gwei
if balances[i]+uint64(delta) > maxBalance {
balances[i] = maxBalance
} else {
balances[i] += uint64(delta)
}
}
}
_ = target.SetBalances(balances)
}
// Apply realistic validator changes
if len(validatorChanges) > 0 {
validators := target.Validators()
for i, shouldChange := range validatorChanges {
if i >= len(validators) {
break
}
if shouldChange {
// Make realistic changes - small effective balance adjustments
validators[i].EffectiveBalance += 1000000000 // 1 ETH
}
}
_ = target.SetValidators(validators)
}
// Create diff
diff, err := Diff(source, target)
if err != nil {
// If diff creation fails, that's acceptable for malformed inputs
return
}
// Apply diff
result, err := ApplyDiff(ctx, source, diff)
if err != nil {
// If diff application fails, that's acceptable
return
}
// Verify round-trip property: source + diff = target
require.Equal(t, target.Slot(), result.Slot())
// Verify balance consistency
targetBalances := target.Balances()
resultBalances := result.Balances()
require.Equal(t, len(targetBalances), len(resultBalances))
for i := range targetBalances {
require.Equal(t, targetBalances[i], resultBalances[i], "Balance mismatch at index %d", i)
}
// Verify validator consistency
targetVals := target.Validators()
resultVals := result.Validators()
require.Equal(t, len(targetVals), len(resultVals))
for i := range targetVals {
require.Equal(t, targetVals[i].Slashed, resultVals[i].Slashed, "Validator slashing mismatch at index %d", i)
require.Equal(t, targetVals[i].EffectiveBalance, resultVals[i].EffectiveBalance, "Validator balance mismatch at index %d", i)
}
})
}
// PropertyTestReasonablePerformance verifies operations complete quickly with realistic data
func FuzzPropertyResourceBounds(f *testing.F) {
f.Fuzz(func(t *testing.T, validatorCount uint8, slotDelta uint8, changeCount uint8) {
// Use realistic parameters
validators := uint64(validatorCount%64 + 8) // 8-71 validators
slots := uint64(slotDelta % 32) // 0-31 slots
changes := int(changeCount % 10) // 0-9 changes
// Create realistic states
source, _ := util.DeterministicGenesisStateElectra(t, validators)
target := source.Copy()
// Apply realistic changes
_ = target.SetSlot(source.Slot() + primitives.Slot(slots))
if changes > 0 {
validatorList := target.Validators()
for i := 0; i < changes && i < len(validatorList); i++ {
validatorList[i].EffectiveBalance += 1000000000 // 1 ETH
}
_ = target.SetValidators(validatorList)
}
// Operations should complete quickly
start := time.Now()
diff, err := Diff(source, target)
duration := time.Since(start)
if err == nil {
// Should be fast
require.Equal(t, true, duration < time.Second, "Diff creation too slow: %v", duration)
// Apply should also be fast
start = time.Now()
_, err = ApplyDiff(t.Context(), source, diff)
duration = time.Since(start)
if err == nil {
require.Equal(t, true, duration < time.Second, "Diff application too slow: %v", duration)
}
}
})
}
// PropertyTestDiffSize verifies that diffs are smaller than full states for typical cases
func FuzzPropertyDiffEfficiency(f *testing.F) {
f.Fuzz(func(t *testing.T, slotDelta uint64, numChanges uint8) {
if slotDelta > 100 {
slotDelta = slotDelta % 100
}
if numChanges > 10 {
numChanges = numChanges % 10
}
// Create states with small differences
source, _ := util.DeterministicGenesisStateElectra(t, 64)
target := source.Copy()
_ = target.SetSlot(source.Slot() + primitives.Slot(slotDelta))
// Make a few small changes
if numChanges > 0 {
validators := target.Validators()
for i := uint8(0); i < numChanges && int(i) < len(validators); i++ {
validators[i].EffectiveBalance += 1000
}
_ = target.SetValidators(validators)
}
// Create diff
diff, err := Diff(source, target)
if err != nil {
return
}
// For small changes, diff should be much smaller than full state
sourceSSZ, err := source.MarshalSSZ()
if err != nil {
return
}
diffSize := len(diff.StateDiff) + len(diff.ValidatorDiffs) + len(diff.BalancesDiff)
// Diff should be smaller than full state for small changes
if numChanges <= 5 && slotDelta <= 10 {
require.Equal(t, true, diffSize < len(sourceSSZ)/2,
"Diff size %d should be less than half of state size %d", diffSize, len(sourceSSZ))
}
})
}
// PropertyTestBalanceConservation verifies that balance operations don't create/destroy value unexpectedly
func FuzzPropertyBalanceConservation(f *testing.F) {
f.Fuzz(func(t *testing.T, balanceData []byte) {
// Convert byte data to balance changes, bounded to safe range
var balanceChanges []int64
for i := 0; i+7 < len(balanceData) && len(balanceChanges) < 50; i += 8 {
rawChange := int64(binary.LittleEndian.Uint64(balanceData[i : i+8]))
// Bound the change to ensure resulting balances stay within safe range
change := rawChange % (maxSafeBalance / 2) // Divide by 2 to allow for addition/subtraction
balanceChanges = append(balanceChanges, change)
}
source, _ := util.DeterministicGenesisStateElectra(t, uint64(len(balanceChanges)+10))
originalBalances := source.Balances()
// Ensure initial balances are within safe range for int64 casting
for i, balance := range originalBalances {
if balance > maxSafeBalance {
originalBalances[i] = balance % maxSafeBalance
}
}
_ = source.SetBalances(originalBalances)
// Calculate total before
var totalBefore uint64
for _, balance := range originalBalances {
totalBefore += balance
}
// Apply balance changes via diff system
target := source.Copy()
targetBalances := target.Balances()
var totalDelta int64
for i, delta := range balanceChanges {
if i >= len(targetBalances) {
break
}
// Prevent underflow
if delta < 0 && uint64(-delta) > targetBalances[i] {
totalDelta -= int64(targetBalances[i]) // Actually lost amount (negative)
targetBalances[i] = 0
} else if delta < 0 {
targetBalances[i] -= uint64(-delta)
totalDelta += delta
} else {
// Prevent overflow
if uint64(delta) > math.MaxUint64-targetBalances[i] {
gained := math.MaxUint64 - targetBalances[i]
totalDelta += int64(gained)
targetBalances[i] = math.MaxUint64
} else {
targetBalances[i] += uint64(delta)
totalDelta += delta
}
}
}
_ = target.SetBalances(targetBalances)
// Apply through diff system
diff, err := Diff(source, target)
if err != nil {
return
}
result, err := ApplyDiff(t.Context(), source, diff)
if err != nil {
return
}
// Calculate total after
resultBalances := result.Balances()
var totalAfter uint64
for _, balance := range resultBalances {
totalAfter += balance
}
// Verify conservation (accounting for intended changes)
expectedTotal := totalBefore
if totalDelta >= 0 {
expectedTotal += uint64(totalDelta)
} else {
if uint64(-totalDelta) <= expectedTotal {
expectedTotal -= uint64(-totalDelta)
} else {
expectedTotal = 0
}
}
require.Equal(t, expectedTotal, totalAfter,
"Balance conservation violated: before=%d, delta=%d, expected=%d, actual=%d",
totalBefore, totalDelta, expectedTotal, totalAfter)
})
}
// PropertyTestMonotonicSlot verifies slot only increases
func FuzzPropertyMonotonicSlot(f *testing.F) {
f.Fuzz(func(t *testing.T, slotDelta uint64) {
source, _ := util.DeterministicGenesisStateElectra(t, 16)
target := source.Copy()
targetSlot := source.Slot() + primitives.Slot(slotDelta)
_ = target.SetSlot(targetSlot)
diff, err := Diff(source, target)
if err != nil {
return
}
result, err := ApplyDiff(t.Context(), source, diff)
if err != nil {
return
}
// Slot should never decrease
require.Equal(t, true, result.Slot() >= source.Slot(),
"Slot decreased from %d to %d", source.Slot(), result.Slot())
// Slot should match target
require.Equal(t, targetSlot, result.Slot())
})
}
// PropertyTestValidatorIndexIntegrity verifies validator indices remain consistent
func FuzzPropertyValidatorIndices(f *testing.F) {
f.Fuzz(func(t *testing.T, changeData []byte) {
// Convert byte data to boolean changes
var changes []bool
for i := 0; i < len(changeData) && len(changes) < 20; i++ {
changes = append(changes, changeData[i]%2 == 0)
}
source, _ := util.DeterministicGenesisStateElectra(t, uint64(len(changes)+5))
target := source.Copy()
// Apply changes
validators := target.Validators()
for i, shouldChange := range changes {
if i >= len(validators) {
break
}
if shouldChange {
validators[i].EffectiveBalance += 1000
}
}
_ = target.SetValidators(validators)
diff, err := Diff(source, target)
if err != nil {
return
}
result, err := ApplyDiff(t.Context(), source, diff)
if err != nil {
return
}
// Validator count should not decrease
require.Equal(t, true, len(result.Validators()) >= len(source.Validators()),
"Validator count decreased from %d to %d", len(source.Validators()), len(result.Validators()))
// Public keys should be preserved for existing validators
sourceVals := source.Validators()
resultVals := result.Validators()
for i := range sourceVals {
if i < len(resultVals) {
require.DeepEqual(t, sourceVals[i].PublicKey, resultVals[i].PublicKey,
"Public key changed at validator index %d", i)
}
}
})
}

View File

@@ -0,0 +1,392 @@
package hdiff
import (
"fmt"
"sync"
"testing"
"time"
"github.com/OffchainLabs/prysm/v6/testing/require"
"github.com/OffchainLabs/prysm/v6/testing/util"
)
// TestIntegerOverflowProtection tests protection against balance overflow attacks
func TestIntegerOverflowProtection(t *testing.T) {
source, _ := util.DeterministicGenesisStateElectra(t, 8)
// Test balance overflow in diffToBalances - use realistic values
t.Run("balance_diff_overflow", func(t *testing.T) {
target := source.Copy()
balances := target.Balances()
// Set high but realistic balance values (32 ETH in Gwei = 32e9)
balances[0] = 32000000000 // 32 ETH
balances[1] = 64000000000 // 64 ETH
_ = target.SetBalances(balances)
// This should work fine with realistic values
diffs, err := diffToBalances(source, target)
require.NoError(t, err)
// Verify the diffs are reasonable
require.Equal(t, true, len(diffs) > 0, "Should have balance diffs")
})
// Test reasonable balance changes
t.Run("realistic_balance_changes", func(t *testing.T) {
// Create realistic balance changes (slashing, rewards)
balancesDiff := []int64{1000000000, -500000000, 2000000000} // 1 ETH gain, 0.5 ETH loss, 2 ETH gain
// Apply to state with normal balances
testSource := source.Copy()
normalBalances := []uint64{32000000000, 32000000000, 32000000000} // 32 ETH each
_ = testSource.SetBalances(normalBalances)
// This should work fine
result, err := applyBalancesDiff(testSource, balancesDiff)
require.NoError(t, err)
resultBalances := result.Balances()
require.Equal(t, uint64(33000000000), resultBalances[0]) // 33 ETH
require.Equal(t, uint64(31500000000), resultBalances[1]) // 31.5 ETH
require.Equal(t, uint64(34000000000), resultBalances[2]) // 34 ETH
})
}
// TestReasonablePerformance tests that operations complete in reasonable time
func TestReasonablePerformance(t *testing.T) {
t.Run("large_state_performance", func(t *testing.T) {
// Test with a large but realistic validator set
source, _ := util.DeterministicGenesisStateElectra(t, 1000) // 1000 validators
target := source.Copy()
// Make realistic changes
_ = target.SetSlot(source.Slot() + 32) // One epoch
validators := target.Validators()
for i := 0; i < 100; i++ { // 10% of validators changed
validators[i].EffectiveBalance += 1000000000 // 1 ETH change
}
_ = target.SetValidators(validators)
// Should complete quickly
start := time.Now()
diff, err := Diff(source, target)
duration := time.Since(start)
require.NoError(t, err)
require.Equal(t, true, duration < time.Second, "Diff creation took too long: %v", duration)
require.Equal(t, true, len(diff.StateDiff) > 0, "Should have state diff")
})
t.Run("realistic_diff_application", func(t *testing.T) {
// Test applying diffs to large states
source, _ := util.DeterministicGenesisStateElectra(t, 500)
target := source.Copy()
_ = target.SetSlot(source.Slot() + 1)
// Create and apply diff
diff, err := Diff(source, target)
require.NoError(t, err)
start := time.Now()
result, err := ApplyDiff(t.Context(), source, diff)
duration := time.Since(start)
require.NoError(t, err)
require.Equal(t, target.Slot(), result.Slot())
require.Equal(t, true, duration < time.Second, "Diff application took too long: %v", duration)
})
}
// TestStateTransitionValidation tests realistic state transition scenarios
func TestStateTransitionValidation(t *testing.T) {
t.Run("validator_slashing_scenario", func(t *testing.T) {
source, _ := util.DeterministicGenesisStateElectra(t, 10)
target := source.Copy()
// Simulate validator slashing (realistic scenario)
validators := target.Validators()
validators[0].Slashed = true
validators[0].EffectiveBalance = 0 // Slashed validator loses balance
_ = target.SetValidators(validators)
// This should work fine
diff, err := Diff(source, target)
require.NoError(t, err)
result, err := ApplyDiff(t.Context(), source, diff)
require.NoError(t, err)
require.Equal(t, true, result.Validators()[0].Slashed)
require.Equal(t, uint64(0), result.Validators()[0].EffectiveBalance)
})
t.Run("epoch_transition_scenario", func(t *testing.T) {
source, _ := util.DeterministicGenesisStateElectra(t, 64)
target := source.Copy()
// Simulate epoch transition with multiple changes
_ = target.SetSlot(source.Slot() + 32) // One epoch
// Some validators get rewards, others get penalties
balances := target.Balances()
for i := 0; i < len(balances); i++ {
if i%2 == 0 {
balances[i] += 100000000 // 0.1 ETH reward
} else {
if balances[i] > 50000000 {
balances[i] -= 50000000 // 0.05 ETH penalty
}
}
}
_ = target.SetBalances(balances)
// This should work smoothly
diff, err := Diff(source, target)
require.NoError(t, err)
result, err := ApplyDiff(t.Context(), source, diff)
require.NoError(t, err)
require.Equal(t, target.Slot(), result.Slot())
})
t.Run("consistent_state_root", func(t *testing.T) {
// Test that diffs preserve state consistency
source, _ := util.DeterministicGenesisStateElectra(t, 32)
target := source.Copy()
// Make minimal changes
_ = target.SetSlot(source.Slot() + 1)
// Diff and apply should be consistent
diff, err := Diff(source, target)
require.NoError(t, err)
result, err := ApplyDiff(t.Context(), source, diff)
require.NoError(t, err)
// Result should match target
require.Equal(t, target.Slot(), result.Slot())
require.Equal(t, len(target.Validators()), len(result.Validators()))
require.Equal(t, len(target.Balances()), len(result.Balances()))
})
}
// TestSerializationRoundTrip tests serialization consistency
func TestSerializationRoundTrip(t *testing.T) {
t.Run("diff_serialization_consistency", func(t *testing.T) {
// Test that serialization and deserialization are consistent
source, _ := util.DeterministicGenesisStateElectra(t, 16)
target := source.Copy()
// Make changes
_ = target.SetSlot(source.Slot() + 5)
validators := target.Validators()
validators[0].EffectiveBalance += 1000000000
_ = target.SetValidators(validators)
// Create diff
diff1, err := Diff(source, target)
require.NoError(t, err)
// Deserialize and re-serialize
hdiff, err := newHdiff(diff1)
require.NoError(t, err)
diff2 := hdiff.serialize()
// Apply both diffs - should get same result
result1, err := ApplyDiff(t.Context(), source, diff1)
require.NoError(t, err)
result2, err := ApplyDiff(t.Context(), source, diff2)
require.NoError(t, err)
require.Equal(t, result1.Slot(), result2.Slot())
require.Equal(t, result1.Validators()[0].EffectiveBalance, result2.Validators()[0].EffectiveBalance)
})
t.Run("empty_diff_handling", func(t *testing.T) {
// Test that empty diffs are handled correctly
source, _ := util.DeterministicGenesisStateElectra(t, 8)
target := source.Copy() // No changes
// Should create minimal diff
diff, err := Diff(source, target)
require.NoError(t, err)
// Apply should work and return equivalent state
result, err := ApplyDiff(t.Context(), source, diff)
require.NoError(t, err)
require.Equal(t, source.Slot(), result.Slot())
require.Equal(t, len(source.Validators()), len(result.Validators()))
})
t.Run("compression_efficiency", func(t *testing.T) {
// Test that compression is working effectively
source, _ := util.DeterministicGenesisStateElectra(t, 100)
target := source.Copy()
// Make small changes
_ = target.SetSlot(source.Slot() + 1)
validators := target.Validators()
validators[0].EffectiveBalance += 1000000000
_ = target.SetValidators(validators)
// Create diff
diff, err := Diff(source, target)
require.NoError(t, err)
// Get full state size
fullStateSSZ, err := target.MarshalSSZ()
require.NoError(t, err)
// Diff should be much smaller than full state
diffSize := len(diff.StateDiff) + len(diff.ValidatorDiffs) + len(diff.BalancesDiff)
require.Equal(t, true, diffSize < len(fullStateSSZ)/2,
"Diff should be smaller than full state: diff=%d, full=%d", diffSize, len(fullStateSSZ))
})
}
// TestKMPSecurity tests the KMP algorithm for security issues
func TestKMPSecurity(t *testing.T) {
t.Run("nil_pointer_handling", func(t *testing.T) {
// Test with nil pointers in the pattern/text
pattern := []*int{nil, nil, nil}
text := []*int{nil, nil, nil, nil, nil}
equals := func(a, b *int) bool {
if a == nil && b == nil {
return true
}
if a == nil || b == nil {
return false
}
return *a == *b
}
// Should not panic - result can be any integer
result := kmpIndex(len(pattern), text, equals)
_ = result // Any result is valid, just ensure no panic
})
t.Run("empty_pattern_edge_case", func(t *testing.T) {
var pattern []*int
text := []*int{new(int), new(int)}
equals := func(a, b *int) bool { return a == b }
result := kmpIndex(0, text, equals)
require.Equal(t, 0, result, "Empty pattern should return 0")
_ = pattern // Silence unused variable warning
})
t.Run("realistic_pattern_performance", func(t *testing.T) {
// Test with realistic sizes to ensure good performance
realisticSize := 100 // More realistic for validator arrays
pattern := make([]*int, realisticSize)
text := make([]*int, realisticSize*2)
// Create realistic pattern
for i := range pattern {
val := i % 10 // More variation
pattern[i] = &val
}
for i := range text {
val := i % 10
text[i] = &val
}
equals := func(a, b *int) bool {
if a == nil && b == nil {
return true
}
if a == nil || b == nil {
return false
}
return *a == *b
}
start := time.Now()
result := kmpIndex(len(pattern), text, equals)
duration := time.Since(start)
// Should complete quickly with realistic inputs
require.Equal(t, true, duration < time.Second,
"KMP took too long: %v", duration)
_ = result // Any result is valid, just ensure performance is good
})
}
// TestConcurrencySafety tests thread safety of the hdiff operations
func TestConcurrencySafety(t *testing.T) {
t.Run("concurrent_diff_creation", func(t *testing.T) {
source, _ := util.DeterministicGenesisStateElectra(t, 32)
target := source.Copy()
_ = target.SetSlot(source.Slot() + 1)
const numGoroutines = 10
const iterations = 100
var wg sync.WaitGroup
errors := make(chan error, numGoroutines*iterations)
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func(workerID int) {
defer wg.Done()
for j := 0; j < iterations; j++ {
_, err := Diff(source, target)
if err != nil {
errors <- fmt.Errorf("worker %d iteration %d: %v", workerID, j, err)
}
}
}(i)
}
wg.Wait()
close(errors)
// Check for any errors
for err := range errors {
t.Error(err)
}
})
t.Run("concurrent_diff_application", func(t *testing.T) {
ctx := t.Context()
source, _ := util.DeterministicGenesisStateElectra(t, 16)
target := source.Copy()
_ = target.SetSlot(source.Slot() + 5)
diff, err := Diff(source, target)
require.NoError(t, err)
const numGoroutines = 10
var wg sync.WaitGroup
errors := make(chan error, numGoroutines)
for i := 0; i < numGoroutines; i++ {
wg.Add(1)
go func(workerID int) {
defer wg.Done()
// Each goroutine needs its own copy of the source state
localSource := source.Copy()
_, err := ApplyDiff(ctx, localSource, diff)
if err != nil {
errors <- fmt.Errorf("worker %d: %v", workerID, err)
}
}(i)
}
wg.Wait()
close(errors)
// Check for any errors
for err := range errors {
t.Error(err)
}
})
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,399 @@
# State diffs in Prysm
The current document describes the implementation details and the design of hierarchical state diffs on Prysm. They follow the same design as [Lighthouse](https://github.com/dapplion/tree-states-review-guide/blob/main/persisted_hdiff.md) which in turn is an implementation of A. Nashatyrev's [design](https://hackmd.io/G82DNSdvR5Osw2kg565lBA).
Incremental state diffs can be used both for databases and memory representations of states. This document focuses on the state diffs necessary for the first usage. Prysm already handles memory deduplication of states with multi value slices, thus a diff mechanism would result in less impact.
## The basic design.
The idea is to diagram the cold-state database as a forest:
- Each tree in the forest is rooted by a full state snapshot, saved every λ_0 slots (think once a year).
- Each tree has the same height h. The root is unique and corresponds to the full snapshot, but on each level *1 ≤ i ≤ h*, there are β_i bifurcation nodes, which are stored every λ_i slots. Thus for example if we had *h = 2*, *λ_0 = 2^21*, *λ_1 = 2^18*, *λ_2 = 2^5*, we would have *β_1 = 7* and *β_2 = 8191* (notice that we subtract 1 since the first bifurcation node is just the state of the upper level). On the first level we would have 7 nodes written every ~36 days and on the second level we would have 8191 nodes written once every epoch.
- At each level *1 ≤ i ≤ h*, in the *β_i* nodes that are stored, instead of writing a full state snapshot, we store the diff between the state at that given slot and the state corresponding to the parent node in level *i-1*.
![database layout](./db_layout.png)
### Saving state diffs.
Let us assume that we have a running node that already has an hdiff compatible database. That is, some snapshot with a full state is saved at some slot `o` (for *offset*). Suppose that we have just updated finalization, thus we have some blocks that we may need to save a state diff (or even a snapshot) for. Suppose we try for a block with slot `c`. Then at each of the slots
o, o + λ_0, o + 2 λ_0, ..., o + k_0 λ_0
we have a full snapshot state saved. We assume that o + (k_0+1) λ_0 > c, so that our latest snapshot is in fact at slot o + k λ_0. Let us call this state *s_0*. At each of the slots
o + k_0 λ_0 + λ_1, o + k_0 λ_0 + 2 λ_1, ..., o + k_0 λ_0 + k_1 λ_1
we have stored a state diff between the state at that slot and *s_0*. We assume that
o + k_0 λ_0 + (k_1+1) λ_1 > c
so that the latest diff at level one is in fact at slot o + k_0 λ_0 + k_1 λ_1. Let us call the sate at that slot *s_1*. it is obtained by applying the state diff saved at that slot to the state *s_0*. Similarly at the next level, for each slot
o + k_0 λ_0 + k_1 λ_1 + λ_2, o + k_0 λ_0 + k_1 λ_1 + 2 λ_2, ..., o + k_0 λ_0 + k_1 λ_1 + k_2 λ_2
we have stored a state diff to the state *s_1*. We assume that
o + k_0 λ_0 + k_1 λ_1 + (k_2+1) λ_2 > c
so that the latest diff at level two is indeed at slot o + k_0 λ_0 + k_1 λ_1 + k_2 λ_2. Let us call the corresponding state *s_2*. It is obtained applying the last diff at level 2 to the state *s_1*, which in turn was obtained appplying a diff to the state *s_0*.
We continue until we have covered all of our levels up to level h. That is we have states *s_0*, *s_1*, ..., *s_{h}* and the last one is the state at slot
o + k_0 λ_0 + k_1 λ_1 + ... + k_h λ_h
So now we want to decide what do to with our state *t* at slot c. We act as follows. If o + k_0 λ_0 + k_1 λ_1 + ... + (k_h+1) λ_h > c. In this case we don't store anything. If on the other hand we have o + k_0 λ_0 + k_1 λ_1 + ... + (k_h+1) λ_h = c. In this case we will store either a state diff or an entire new snapshot. We proceed as follows.
If k_h < β_h, in this case we store a new state diff `Diff(s_{h-1},t)` at the slot c in level `h`.
If k_h = β_h, we check the next level. If k_{h-1} < β_{h-1}, then we store a new state diff `Diff(s_{h-2},t)` at level `h-1` at the slot `c`.
If k_{h-1} = β_{h-1} then we compare the next level: if k_{h-2} < β_{h-2}, then we store a new state diff `Diff(s_{h-3}, t)` at level `h-2` at the slot `c`.
We continue like this, if we reach the point in which all k_i = β_i for ì=1,...,h, then we store a new full snapshot with the state `t` at the slot `c`.
### Triggering storage
When we update finalization, we call `MigrateToCold`, this function, instead of calling the database to store a full state every few epochs (as we do today), will send the state `t` at slot `c` as in the previous section, to save the corresponding diff. The package that handles state saving internally is the `database` package. However, the function `MigrateToCold` is aware of the values of the offset *o* and the configuration constants λ_1, ..., λ_h so as to only send the states `t` for which `c` is of the form `o + k λ_h`.
### Database changes
The database exposes the following API to save states
```
SaveState(ctx context.Context, state state.ReadOnlyBeaconState, blockRoot [32]byte) error
```
This functions will change internally to save just the diff or a snapshot if appropriate. On the other hand, the following is the API to recover a state:
```go
HasState(ctx context.Context, blockRoot [32]byte) bool
State(ctx context.Context, blockRoot [32]byte) (state.BeaconState, error)
```
The first function can return true now in a static manner according to the slot of the corresponing `blockRoot`, it simply checks that it is of the form o + k λ_h. The second function can recover those states by applying the corresponding diffs.
Summarizing, the database has no changes in the exposed API, minimizing changes in the overal Prysm implementation, while the database internally changes the functions `State` and `SaveState` to use the `consensus-types/hdiff` package. This makes the serialization package fairly contained and only accessible from within the database package.
### Stategen changes
The `stategen` package is respondible for the migration to cold database, it exposes the function
```go
func (s *State) MigrateToCold(ctx context.Context, fRoot [32]byte) error {
```
that takes the finalized root and decides which states to save. This function is now changed to save only based on the slot of the state, for those slots that have the form o + k λ_h. A **warning** has to be said about missing blocks. Since the database will have to keep the state by slots now, a good approach in this function when there is a missing block at the corresponding slot, is to actually process the state to the right slot and save it already processed.
Another function that needs to change minimally is the function
```
func (s *State) StateByRoot(ctx context.Context, blockRoot [32]byte) (state.BeaconState, error)
```
That will get the ancestor from db simply by the slot rather than the root.
### Longer term changes
We could change the database API to include getters and setters by slot in the cold database, since anyway this will keep only canonical states this would make things easier at the stategen level.
### Configuration
We can make the constants h and λ_0, ... , λ_h user-configuratble. Thus, someone that is less storage constained and wants to run an archive RPC node, will set h higher and λ_h smaller (say 32 to save one diff every epoch), while a user that doesn't care about past states may even set `h=0` and not save anything.
### Database migration
There is no migration support expected.
### Startup from clean database
Starting up from a clean database and checkpoint sync will download the checkpoint state at slot o and set that slot as the offset in the database and save the first full snapshot with the checkpoint state.
Starting up from a clean database and from genesis will set o = 0 and start syncing from genesis as usual.
### Backfill
The following is added as an configurable option, pass the flag `--backfill-origin-state ssz`, in this case the node will download the state `ssz` and set as offset this state's slot. Will download the checkpoint state and start syncing forward as usual but will not call `MigrateToCold` until the backfill service is finished. In the background the node will download all blocks all the way up to the state ssz, then start forward syncing those blocks regenerating the finalized states and when they are of the form o + k λ_h. Once the forward syncing has caught up with the finalized checkpoint, we can start calling `MigrateToCold` again. This backfill mechanism is much faster than the current foward syncing to regenerate the states: we do not need to do any checks on the EL since the blocks are already finalized and trusted, the hashes are already confirmed.
### Database Prunning
Currently we have a flag `--pruner-retention-epochs` which will be deprecated. Instead, the pruning mechanism is simply the following, the user specifies how many snapshopts wants to keep (by default 0 means keep all snapshots). If the user say specifies `--pruner-retention-snapshots 1`, then the node will delete everything in the database everytime we save a new snapshot every λ_0 slots. So in particular, a user that wants to keep its database to a minimum, it will set h=0, λ_0 to a very large value, and pass 1 to this flag, thus the node will only keep one state at any time and will not update it.
## Implementation details.
This section contains actual implementation details of the feature. It will be populated as pull requests are being opened with the final details of the implementation. For a high level design document please refer to [this previous section](#the-basic-design).
### Serialization
The package `hdiff` located in `consensus-types/hdiff` is responsible for computing and applying state diffs between two different beacon states and serializing/deserializing them to/from a byte sequence.
#### Exported API
The only exported API consists of
```go
type HdiffBytes struct {
StateDiff []byte
ValidatorDiffs []byte
BalancesDiff []byte
}
func Diff(source, target state.ReadOnlyBeaconState) (HdiffBytes, error)
func ApplyDiff(ctx context.Context, source state.BeaconState, diff HdiffBytes) (state.BeaconState, error)
```
The structure `HdiffBytes` contains three different slices that can be handled independently by the caller (typically this will be database methods). These three slices are the serialized and Snappy compressed form of a state diff between two different states.
The function `Diff` takes two states and returns the serialized diff between them. The function `ApplyDiff` takes a state and a diff and returns the target state after having applied the diff to the source state.
#### The `hdiff` structure
When comparing a source state *s* and a target state *t*, before serializing, their difference is kept in a native structure `hdiff` which itself consist of three separate diffs.
```go
type hdiff struct {
stateDiff *stateDiff
validatorDiffs []validatorDiff
balancesDiff []int64
}
```
The `stateDiff` entry contains the bulk of the state diff, except the validator registry diff and the balance slice diff. These last two are separated to be able to store them separatedly. Often times, local RPC requests are for balances or validator status, and with the hierarchical strcutrure, we can reproduce them without regenerating the full state.
#### The `stateDiff` structure
This structure encodes the possible differences between two beacon states.
```go
type stateDiff struct {
targetVersion int
eth1VotesAppend bool
justificationBits byte
slot primitives.Slot
fork *ethpb.Fork
latestBlockHeader *ethpb.BeaconBlockHeader
blockRoots [fieldparams.BlockRootsLength][fieldparams.RootLength]byte
stateRoots [fieldparams.StateRootsLength][fieldparams.RootLength]byte
historicalRoots [][fieldparams.RootLength]byte
eth1Data *ethpb.Eth1Data
eth1DataVotes []*ethpb.Eth1Data
eth1DepositIndex uint64
randaoMixes [fieldparams.RandaoMixesLength][fieldparams.RootLength]byte
slashings [fieldparams.SlashingsLength]int64
previousEpochAttestations []*ethpb.PendingAttestation
currentEpochAttestations []*ethpb.PendingAttestation
previousJustifiedCheckpoint *ethpb.Checkpoint
currentJustifiedCheckpoint *ethpb.Checkpoint
finalizedCheckpoint *ethpb.Checkpoint
previousEpochParticipation []byte
currentEpochParticipation []byte
inactivityScores []uint64
currentSyncCommittee *ethpb.SyncCommittee
nextSyncCommittee *ethpb.SyncCommittee
executionPayloadHeader interfaces.ExecutionData
nextWithdrawalIndex uint64
nextWithdrawalValidatorIndex primitives.ValidatorIndex
historicalSummaries []*ethpb.HistoricalSummary
depositRequestsStartIndex uint64
depositBalanceToConsume primitives.Gwei
exitBalanceToConsume primitives.Gwei
earliestExitEpoch primitives.Epoch
consolidationBalanceToConsume primitives.Gwei
earliestConsolidationEpoch primitives.Epoch
pendingDepositIndex uint64
pendingPartialWithdrawalsIndex uint64
pendingConsolidationsIndex uint64
pendingDepositDiff []*ethpb.PendingDeposit
pendingPartialWithdrawalsDiff []*ethpb.PendingPartialWithdrawal
pendingConsolidationsDiffs []*ethpb.PendingConsolidation
proposerLookahead []uint64
}
```
This type is only used internally when serializing/deserializing and applying state diffs. We could in principle avoid double allocations and increase performance by avoiding entirely having a native type and working directly with the serialized bytes. The tradeoff is readability of the serialization functions.
#### The `validatorDiff` structure
This structure is similar to the `stateDiff` one, it is only used internally in the `hdiff` package in `consensus-types`
```go
type validatorDiff struct {
Slashed bool
index uint32
PublicKey []byte
WithdrawalCredentials []byte
EffectiveBalance uint64
ActivationEligibilityEpoch primitives.Epoch
ActivationEpoch primitives.Epoch
ExitEpoch primitives.Epoch
WithdrawableEpoch primitives.Epoch
}
```
#### The `balancesDiff` slice
Given a source state `s` and a target state `t` assumed to be newer than `s`, so that the length of `t.balances` is greater or equal than that of `s.balances`. Then the `balancesDiff` slice inside the `hdiff` structure is computed simply as the algebraic difference, it's *i-th* entry is given by `t.balances[i] - s.balances[i]` where the second term is considered as zero if `i ≥ len(s.balances)`.
#### Deserializing with `newHdiff`
The function
```go
func newHdiff(data HdiffBytes) (*hdiff, error)
```
takes a serialized diff and produces the native internal type `hdiff`. This function encodes the internal logic for deserialization. It internally calls the functions ` newStateDiff`, `newValidatorDiffs` and `newBalancesDiff` to obtain the three inner structures.
The main deserialization routines take the byte slices and they first decompress them with `snappy.Decode`. They create an empty `stateDiff`, `validatorDiff` or `balancesDiff` object `ret` and after that they pass a pointer to the decompressed byte slice `data` to helper functions `ret.readXXX(&data)` that populate each of the entries of `ret`. Here `XXX` corresponds to each of the entries in the beacon state, like `fork`, `slot`, etc. Each one of the helpers receives a pointer to the `data` slice that contains the byte slice of the diff that **is still yet to be deserialized**. The helper populates the corresponding entry in the hdiff structure and then modifies the `data` slice to drop the deserialized bytes. That is, each helper receives a slice that needs to be deserialized since its first byte.
The following list documents the method that is used for serialization/deserialization of each entry
##### Version
The version is stored as a little endian `uint64` in fixed 8 bytes of `data`. This version is the target version, that is, we override whatever the source state version is, with this target version.
##### Slot
The slot is treated exactly the same as the version entry.
##### Fork
The fork is deserialized as follows. If the first byte of `data` is zero (a constant called `nilMarker` in the package) then the fork pointer is `nil` in the `hdiff` struture. If the first byte of `data` is not zero then the remaining bytes deserialize to a full `Fork` object.
When applying the diff, if the fork pointer is `nil` then the source's Fork is not changed, while if it is not-nil, then the source's Fork is changed to whatever the `hdiff` pointer is.
##### Latest Block Header
The latest Block header is treated exactly like the Fork pointer.
##### Block Roots
The block roots slice is deserialized literally as a full slice of beacon block roots, this may seem like a large waste of memory and space since this slice is 8192 roots, each 32 bytes. However, the serialization process is as follows, if a blockroot has not changed between the source and the target state, we store a full zero root `0x00...`. For states that are *close by*, the block roots slice will not have changed much, this will produce a slice that is mostly zeroes, and these gets stored occupying minimal space with Snappy compression. When two states are more than 8192 slots appart, the target block roots slice will have to be saved in its entirety, which is what this method achieves.
We could get a little more performance here if instead of keeping a full zeroed out root in the internal `hdiff` structure, we stored an empty slice. But this way the check for lengths becomes slightly more complicated.
##### State Roots
The state roots slice is treated exactly like the block roots one.
##### Historical Roots
The historical roots slice diff is stored as follows, the first 8 bytes store a little endian `uint64` that determines the length of the slice. After this, the following bytes contain as many 32 byte roots as this length indicates. Again, as in the previous root slices, if the root is not to be changed from the source state, we store a zero root.
##### Eth1 Data
The Eth1 Data diff object is treated exactly like the fork object.
##### Eth1 Data Votes
The `stateDiff` structure has two fields related to Eth1 data votes. The boolean entry `eth1VotesAppend` and a slice `eth1DataVotes`. The boolean indicates if the slice is to be *appended* to the source target or if the eth1 data vote slice needs to be completely replaced with the slice in the diff.
Deserialization then goes as follows, if the first byte is `nilMarker` then `eth1VotesAppend` is set to `True`, and `False` otherwise. The following 8 bytes contain a `uint64` serialization of the length of the slice. The remaining bytes contain the serialized slice.
##### Eth1 Deposit Index
This field always overrides the source's value. It is stored as an 8 bytes serialized `uint64`.
##### Randao Mixes
This field is treated exactly like the block roots slice.
##### Slashings
The slashings slice is stored as the algebraic difference between the target and the source state `t.slashings - s.slashings`. Thus the data is read as a sequence of 8 bytes serialized little Endian `int64`. When applying this diff to a source state, we add this number to the source state's slashings. This way, numbers are kept small and they snappy compress better.
##### Pending Attestations
Pending attestations are only present in Phase 0 states. So the paths to deserialize them (both for *previous and current epoch attestations*) is only executed in case the target state is a Phase 0 state (notice that this implies that the source state must have been a Phase0 state as well).
For both of these slices we store first the length in the first 8 bytes. Then we loop over the remaining bytes deserializing each pending attestation. Each of them is of variable size and is deserialized as follows, the first 8 bytes contain the attestation aggregation bits length. The next bytes (how many is determined by the aggregation bits length) encode the aggregation bits. The next 128 bytes are the SSZ encoded attestation data. Finally the inclusion delay and the proposer index are serialized as 8 bytes `uint64`.
##### Previous and Current epoch participation
These slices are there post Altair. They are serialized as follows, the first 8 bytes contain the length, and the remaining bytes (indicated by the length) are just stored directly as a byte slice.
##### Justification Bits
These are stored as a single byte and they always override the value of the source state with this byte stored in the `hdiff` structure.
##### Finalized and Previous/Current justified Checkpoints
These are stored as SSZ serialized checkpoints.
##### Inactivity Scores
The first 8 bytes contain the little Endian encoded length, and the remaining bytes contain the `uint64` serialized slice.
##### Current and Next Sync committees
If the first byte is 0, then the sync committee is set to be nil (and therefore the source's sync committee is not changed). Otherwise the remaining bytes contain the SSZ serialized sync committee.
##### Execution Payload Header
This is serialized exactly like the sync committes. Notice that the implementation of `readExecutionPayloadHeader` is more involved because the SSZ serialization of the header depends on the state's version.
##### Withdrawal Indices
The fields `nextWithdrawalIndex` and `nextWithdrawalValidatorIndex` are treated just like the `Slot` field.
##### Historical Summaries
The first 8 bytes store the length of the list and the remaining bytes are stored as SSZ serializations of the summary entry. This slice is **appended** to the source state's historical summary state.
##### Electra requests indices
The fields `depositRequestsStartIndex`, `depositBalanceToConsume`, `exitBalanceToConsume`, `earliestExitEpoch`, `consolidationBalanceToConsume` and `earliestConsolidationEpoch` are stored like the `Slot` field.
##### Pending Deposits
The first 8 bytes store the `pendingDepositIndex`, the next 8 bytes store the length of the pending deposit diff slice. The remaining bytes store a slice of SSZ serialized `PendingDeposit` objects.
This diff slice is different than others, we store the extra index `pendingDepositIndex` in the `hdiff` structure that is used as follows. This index indicates how many pending deposits need to be dropped from the source state. The remaining slice is added to the end of the source state's pending deposits. The rationale for this serialization algorithm is that if taking the diff of two close enough states, the pending deposit queue may be very large. Between the source and the target, the first few deposits may have already been consumed, but the remaining large majority would still be there in the target. The target state may have some more extra deposits to be added in the end.
Similarly, when computing the diff between the source and the target state, we need to find the index of the first deposit in common. We use the [Knuth-Morris-Pratt](https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm) algorith to find it.
Suppose that the source pending deposits are
```
[A, B, C, D, E, F, G, H]
```
And the target pending deposits are
```
[C, D, E, F, G, H, I, J, K]
```
Then we will store `pendingDepositIndex = 2` and the diff slice will be
```
[I, J, K]
```
##### Pending Partial Withdrawals
This field is treated exactly like the pending deposits.
##### Pending Consolidations
This field is treated exactly like the pending deposits.
##### Proposer Lookahead
The proposer lookahead is stored as the SSZ serialized version of the field. It always overrides the source's field.
#### Applying a diff
The exported function
```go
func ApplyDiff(ctx context.Context, source state.BeaconState, diff HdiffBytes) (state.BeaconState, error)
```
Takes care of applying the diff, it first calls `newHdiff` to convert the raw bytes in `diff` into an internal `hdiff` structure, and then it modifies the `source` state as explained above returning the modified state.
#### Computing a Diff
The exported function
```go
func Diff(source, target state.ReadOnlyBeaconState) (HdiffBytes, error)
```
Takes two states and returns the corresponding diff bytes. This function calls the function `diffInternal` which in turn calls `diffToState`, `diffToVals` and `diffToBalances` that each return the corresponding component of an internal `hdiff` structure. Then we call `serialize()` on the correponding `hdiff` structure. The function `serialize` constructs the `data` byte slice as described above in the [Deserialization](#deserialization) section and finally it calls `snappy.Encode()` on each of the three slices.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,5 @@
go test fuzz v1
byte('\x00')
uint64(0)
[]byte("0")
[]byte("")

View File

@@ -0,0 +1,2 @@
go test fuzz v1
[]byte("0")

View File

@@ -0,0 +1,2 @@
go test fuzz v1
[]byte("0000000\xff")