Compare commits

...

2 Commits

Author SHA1 Message Date
Georgios Konstantopoulos
fd8bbf3dbc perf: integrate smart chunking for multiproof targets
Use SmartChunkedMultiProofTargets in the engine's payload processor to
respect account boundaries when chunking proof targets. This prevents
double-seeking when processing proofs for accounts with many storage slots.

Amp-Thread-ID: https://ampcode.com/threads/T-019bfbbc-0a06-7449-afe2-c517f9892319
Co-authored-by: Amp <amp@ampcode.com>
2026-01-26 19:31:41 +00:00
Andrurachi
e4830fbab0 perf: optimize proof chunking to respect account boundaries
closes #18036
2026-01-26 19:17:12 +00:00
6 changed files with 282 additions and 8 deletions

4
Cargo.lock generated
View File

@@ -11135,8 +11135,12 @@ dependencies = [
"plain_hasher",
"proptest",
"proptest-arbitrary-interop",
"rand 0.9.2",
"rayon",
"reth-codecs",
"reth-db",
"reth-db-api",
"reth-primitives",
"reth-primitives-traits",
"revm-database",
"revm-state",

View File

@@ -362,11 +362,12 @@ impl VersionedMultiProofTargets {
}
/// Chunks this `VersionedMultiProofTargets` into smaller chunks of the given size.
///
/// Uses smart chunking for legacy targets to respect account boundaries and prevent
/// double-seeking when processing proofs.
fn chunks(self, chunk_size: usize) -> Box<dyn Iterator<Item = Self>> {
match self {
Self::Legacy(targets) => {
Box::new(MultiProofTargets::chunks(targets, chunk_size).map(Self::Legacy))
}
Self::Legacy(targets) => Box::new(targets.smart_chunks(chunk_size).map(Self::Legacy)),
Self::V2(targets) => {
Box::new(ChunkedMultiProofTargetsV2::new(targets, chunk_size).map(Self::V2))
}

View File

@@ -46,6 +46,11 @@ arbitrary = { workspace = true, features = ["derive"], optional = true }
rayon = { workspace = true, optional = true }
[dev-dependencies]
reth-db = { workspace = true, features = ["test-utils", "mdbx"] }
reth-db-api = { workspace = true }
reth-primitives = { workspace = true }
rand = { workspace = true }
reth-primitives-traits = { workspace = true, features = ["serde"] }
reth-codecs.workspace = true
alloy-genesis.workspace = true
@@ -141,3 +146,7 @@ harness = false
name = "hashed_state"
harness = false
required-features = ["rayon"]
[[bench]]
name = "smart_chunking"
harness = false

View File

@@ -0,0 +1,152 @@
#![allow(missing_docs, unreachable_pub)]
use alloy_primitives::{keccak256, map::HashSet, Address, B256, U256};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rand::{rngs::StdRng, Rng, SeedableRng};
use reth_db::{
cursor::DbDupCursorRO,
tables,
test_utils::create_test_rw_db,
transaction::{DbTx, DbTxMut},
Database, DatabaseEnv,
};
use reth_primitives::StorageEntry;
use reth_trie_common::proofs::{
ChunkedMultiProofTargets, MultiProofTargets, SmartChunkedMultiProofTargets,
};
use std::sync::Arc;
// Constant chunk size
const CHUNK_SIZE: usize = 60;
// fn seed_fragmentation_db(num_accounts: usize, slots_per_account: usize) ->
// (Arc<reth_db::test_utils::TempDatabase<DatabaseEnv>>, MultiProofTargets) { let db =
// create_test_rw_db(); let tx = db.tx_mut().expect("failed to create rw tx");
// let mut targets = MultiProofTargets::default();
// for i in 0..num_accounts {
// let address = Address::from_word(B256::from(U256::from(i)));
// let hashed_address = keccak256(address);
// let mut slot_keys = HashSet::default();
// for j in 0..slots_per_account {
// let slot_key = B256::from(U256::from(j));
// let hashed_slot = keccak256(slot_key);
// let value = U256::from(1);
// tx.put::<tables::HashedStorages>(
// hashed_address,
// StorageEntry { key: hashed_slot, value }
// ).expect("failed to insert");
// slot_keys.insert(hashed_slot);
// }
// targets.0.insert(hashed_address, slot_keys);
// }
// tx.commit().expect("failed to commit");
// (db, targets)
// }
fn seed_realistic_db(
num_accounts: usize,
) -> (Arc<reth_db::test_utils::TempDatabase<DatabaseEnv>>, MultiProofTargets) {
let db = create_test_rw_db();
let tx = db.tx_mut().expect("failed to create rw tx");
let mut targets = MultiProofTargets::default();
let mut rng = StdRng::seed_from_u64(42);
for i in 0..num_accounts {
let address = Address::from_word(B256::from(U256::from(i)));
let hashed_address = keccak256(address);
// Distribution
// 80% 1-5 slots
// 15% 10-50 slots
// 5% 100-300 slots
let roll = rng.random_range(0..100);
let slots_count = if roll < 80 {
rng.random_range(1..=5)
} else if roll < 95 {
rng.random_range(10..=50)
} else {
rng.random_range(100..=300)
};
let mut slot_keys = HashSet::default();
for j in 0..slots_count {
let slot_key = B256::from(U256::from(j));
let hashed_slot = keccak256(slot_key);
let value = U256::from(1);
tx.put::<tables::HashedStorages>(
hashed_address,
StorageEntry { key: hashed_slot, value },
)
.expect("failed to insert");
slot_keys.insert(hashed_slot);
}
targets.0.insert(hashed_address, slot_keys);
}
tx.commit().expect("failed to commit");
(db, targets)
}
fn execute_proof_fetch(
db: &Arc<reth_db::test_utils::TempDatabase<DatabaseEnv>>,
chunks: Vec<MultiProofTargets>,
) {
let tx = db.tx().expect("ro tx");
for chunk in chunks {
let mut cursor = tx.cursor_read::<tables::HashedStorages>().expect("cursor");
for (hashed_addr, slots) in chunk.0 {
// Seeking the Account
if let Some(_entry) = cursor.seek_by_key_subkey(hashed_addr, B256::ZERO).expect("seek")
{
for slot in slots {
black_box(slot);
}
}
}
}
}
fn bench_chunking_strategies(c: &mut Criterion) {
let mut group = c.benchmark_group("Smart_Chunking_Experiment");
let num_accounts = 1000;
let (db, targets) = seed_realistic_db(num_accounts);
group.bench_function("old_chunking_logic", |b| {
b.iter(|| {
let t = targets.clone();
let chunker = ChunkedMultiProofTargets::new(t, CHUNK_SIZE);
let chunks: Vec<_> = chunker.collect();
execute_proof_fetch(&db, chunks);
})
});
group.bench_function("smart_chunking_logic", |b| {
b.iter(|| {
let t = targets.clone();
let chunker = SmartChunkedMultiProofTargets::new(t, CHUNK_SIZE);
let chunks: Vec<_> = chunker.collect();
execute_proof_fetch(&db, chunks);
})
});
group.finish();
}
criterion_group!(benches, bench_chunking_strategies);
criterion_main!(benches);

View File

@@ -52,7 +52,7 @@ pub use trie::{BranchNodeMasks, BranchNodeMasksMap, ProofTrieNode};
/// The container indicates when the trie has been modified.
pub mod prefix_set;
mod proofs;
pub mod proofs;
#[cfg(any(test, feature = "test-utils"))]
pub use proofs::triehash;
pub use proofs::*;

View File

@@ -1,7 +1,10 @@
//! Merkle trie proofs.
#[allow(missing_docs)]
use crate::{BranchNodeMasksMap, Nibbles, ProofTrieNode, TrieAccount};
use alloc::{borrow::Cow, vec::Vec};
use alloc::{
borrow::Cow,
vec::{IntoIter, Vec},
};
use alloy_consensus::constants::KECCAK_EMPTY;
use alloy_primitives::{
keccak256,
@@ -14,13 +17,14 @@ use alloy_trie::{
proof::{verify_proof, DecodedProofNodes, ProofNodes, ProofVerificationError},
EMPTY_ROOT_HASH,
};
use core::iter::Peekable;
use derive_more::{Deref, DerefMut, IntoIterator};
use itertools::Itertools;
use reth_primitives_traits::Account;
/// Proof targets map.
#[derive(Deref, DerefMut, IntoIterator, Clone, PartialEq, Eq, Default, Debug)]
pub struct MultiProofTargets(B256Map<B256Set>);
pub struct MultiProofTargets(pub B256Map<B256Set>);
impl FromIterator<(B256, B256Set)> for MultiProofTargets {
fn from_iter<T: IntoIterator<Item = (B256, B256Set)>>(iter: T) -> Self {
@@ -90,6 +94,13 @@ impl MultiProofTargets {
ChunkedMultiProofTargets::new(self, size)
}
/// Returns an iterator that yields chunks respecting account boundaries.
///
/// See [`SmartChunkedMultiProofTargets`] for more information.
pub fn smart_chunks(self, size: usize) -> SmartChunkedMultiProofTargets {
SmartChunkedMultiProofTargets::new(self, size)
}
/// Returns the number of items that will be considered during chunking in `[Self::chunks]`.
pub fn chunking_length(&self) -> usize {
self.values().map(|slots| 1 + slots.len().saturating_sub(1)).sum::<usize>()
@@ -123,7 +134,7 @@ pub struct ChunkedMultiProofTargets {
}
impl ChunkedMultiProofTargets {
fn new(targets: MultiProofTargets, size: usize) -> Self {
pub fn new(targets: MultiProofTargets, size: usize) -> Self {
let flattened_targets = targets
.into_iter()
.flat_map(|(address, slots)| {
@@ -166,6 +177,103 @@ impl Iterator for ChunkedMultiProofTargets {
}
}
/// Iterator that respects account boundaries and enforces chunk limits.
///
/// - Small accounts are never split (preventing double-seeking).
/// - Large accounts are split into multiple full chunks.
#[derive(Debug)]
pub struct SmartChunkedMultiProofTargets {
/// List of accounts to process.
accounts: Peekable<IntoIter<(B256, B256Set)>>,
/// If an account was too big, its remaining slots wait here for the next pass.
/// stored as: (Address, Sorted List of Slots)
pending_large_account: Option<(B256, Vec<B256>)>,
/// Chunk size limit.
chunk_size: usize,
}
impl SmartChunkedMultiProofTargets {
pub fn new(targets: MultiProofTargets, size: usize) -> Self {
// Flatten and Sort Accounts
let mut accounts_vec: Vec<(B256, B256Set)> = targets.0.into_iter().collect();
accounts_vec.sort_unstable_by(|a, b| a.0.cmp(&b.0));
Self {
accounts: accounts_vec.into_iter().peekable(),
pending_large_account: None,
chunk_size: size,
}
}
}
impl Iterator for SmartChunkedMultiProofTargets {
type Item = MultiProofTargets;
fn next(&mut self) -> Option<Self::Item> {
let mut current_chunk = MultiProofTargets::default();
let mut current_load = 0;
// Handle leftovers from a previous big account
if let Some((addr, mut remaining_slots)) = self.pending_large_account.take() {
let take_count = self.chunk_size;
if remaining_slots.len() > take_count {
// Still doesn't fit
// Slice off `chunk_size` slots
let chunk_slots: Vec<B256> = remaining_slots.drain(0..take_count).collect();
// Add to chunk
current_chunk.0.insert(addr, chunk_slots.into_iter().collect());
// Put the rest back in pending
self.pending_large_account = Some((addr, remaining_slots));
return Some(current_chunk);
} else {
// It fits. Logic proceeds to fill the rest of the chunk with other accounts
current_load += remaining_slots.len();
current_chunk.0.insert(addr, remaining_slots.into_iter().collect());
}
}
// Process normal accounts
while let Some((_address, slots)) = self.accounts.peek() {
let account_cost = if slots.is_empty() { 1 } else { slots.len() };
// Check fit
if current_load + account_cost > self.chunk_size {
if current_load == 0 {
// The chunk is empty, but the account is big.
let (addr, slots_set) = self.accounts.next().unwrap();
// Convert Set to Sorted Vec (deterministic splitting)
let mut sorted_slots: Vec<B256> = slots_set.into_iter().collect();
sorted_slots.sort_unstable();
// Take what fits
let take_count = self.chunk_size;
let chunk_slots: Vec<B256> = sorted_slots.drain(0..take_count).collect();
// Save the rest
self.pending_large_account = Some((addr, sorted_slots));
current_chunk.0.insert(addr, chunk_slots.into_iter().collect());
return Some(current_chunk);
} else {
// Chunk has data, account doesn't fit.
return Some(current_chunk);
}
}
// It fits. Consume it.
let (addr, slots) = self.accounts.next().unwrap();
current_load += account_cost;
current_chunk.0.insert(addr, slots);
}
if current_chunk.0.is_empty() {
None
} else {
Some(current_chunk)
}
}
}
/// The state multiproof of target accounts and multiproofs of their storage tries.
/// Multiproof is effectively a state subtrie that only contains the nodes
/// in the paths of target accounts.