perf: integrate smart chunking for multiproof targets

Use SmartChunkedMultiProofTargets in the engine's payload processor to respect account boundaries when chunking proof targets. This prevents double-seeking when processing proofs for accounts with many storage slots. Amp-Thread-ID: https://ampcode.com/threads/T-019bfbbc-0a06-7449-afe2-c517f9892319 Co-authored-by: Amp <amp@ampcode.com>
perf: optimize proof chunking to respect account boundaries
2026-04-30 03:01:58 -04:00 · 2026-01-26 19:31:41 +00:00 · 2026-01-26 19:17:12 +00:00
6 changed files with 282 additions and 8 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -11135,8 +11135,12 @@ dependencies = [
 "plain_hasher",
 "proptest",
 "proptest-arbitrary-interop",
+ "rand 0.9.2",
 "rayon",
 "reth-codecs",
+ "reth-db",
+ "reth-db-api",
+ "reth-primitives",
 "reth-primitives-traits",
 "revm-database",
 "revm-state",
--- a/crates/engine/tree/src/tree/payload_processor/multiproof.rs
+++ b/crates/engine/tree/src/tree/payload_processor/multiproof.rs
@@ -362,11 +362,12 @@ impl VersionedMultiProofTargets {
    }

    /// Chunks this `VersionedMultiProofTargets` into smaller chunks of the given size.
+    ///
+    /// Uses smart chunking for legacy targets to respect account boundaries and prevent
+    /// double-seeking when processing proofs.
    fn chunks(self, chunk_size: usize) -> Box<dyn Iterator<Item = Self>> {
        match self {
-            Self::Legacy(targets) => {
-                Box::new(MultiProofTargets::chunks(targets, chunk_size).map(Self::Legacy))
-            }
+            Self::Legacy(targets) => Box::new(targets.smart_chunks(chunk_size).map(Self::Legacy)),
            Self::V2(targets) => {
                Box::new(ChunkedMultiProofTargetsV2::new(targets, chunk_size).map(Self::V2))
            }
--- a/crates/trie/common/Cargo.toml
+++ b/crates/trie/common/Cargo.toml
@@ -46,6 +46,11 @@ arbitrary = { workspace = true, features = ["derive"], optional = true }
 rayon = { workspace = true, optional = true }

 [dev-dependencies]
+reth-db = { workspace = true, features = ["test-utils", "mdbx"] }
+reth-db-api = { workspace = true }
+reth-primitives = { workspace = true }
+rand = { workspace = true }
+
 reth-primitives-traits = { workspace = true, features = ["serde"] }
 reth-codecs.workspace = true
 alloy-genesis.workspace = true
@@ -141,3 +146,7 @@ harness = false
 name = "hashed_state"
 harness = false
 required-features = ["rayon"]
+
+[[bench]]
+name = "smart_chunking"
+harness = false
--- a/crates/trie/common/benches/smart_chunking.rs
+++ b/crates/trie/common/benches/smart_chunking.rs
@@ -0,0 +1,152 @@
+#![allow(missing_docs, unreachable_pub)]
+
+use alloy_primitives::{keccak256, map::HashSet, Address, B256, U256};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use rand::{rngs::StdRng, Rng, SeedableRng};
+use reth_db::{
+    cursor::DbDupCursorRO,
+    tables,
+    test_utils::create_test_rw_db,
+    transaction::{DbTx, DbTxMut},
+    Database, DatabaseEnv,
+};
+use reth_primitives::StorageEntry;
+use reth_trie_common::proofs::{
+    ChunkedMultiProofTargets, MultiProofTargets, SmartChunkedMultiProofTargets,
+};
+use std::sync::Arc;
+
+// Constant chunk size
+const CHUNK_SIZE: usize = 60;
+
+// fn seed_fragmentation_db(num_accounts: usize, slots_per_account: usize) ->
+// (Arc<reth_db::test_utils::TempDatabase<DatabaseEnv>>, MultiProofTargets) {     let db =
+// create_test_rw_db();     let tx = db.tx_mut().expect("failed to create rw tx");
+//     let mut targets = MultiProofTargets::default();
+
+//     for i in 0..num_accounts {
+//         let address = Address::from_word(B256::from(U256::from(i)));
+//         let hashed_address = keccak256(address);
+
+//         let mut slot_keys = HashSet::default();
+
+//         for j in 0..slots_per_account {
+//             let slot_key = B256::from(U256::from(j));
+//             let hashed_slot = keccak256(slot_key);
+//             let value = U256::from(1);
+
+//             tx.put::<tables::HashedStorages>(
+//                 hashed_address,
+//                 StorageEntry { key: hashed_slot, value }
+//             ).expect("failed to insert");
+
+//             slot_keys.insert(hashed_slot);
+//         }
+
+//         targets.0.insert(hashed_address, slot_keys);
+//     }
+
+//     tx.commit().expect("failed to commit");
+//     (db, targets)
+// }
+
+fn seed_realistic_db(
+    num_accounts: usize,
+) -> (Arc<reth_db::test_utils::TempDatabase<DatabaseEnv>>, MultiProofTargets) {
+    let db = create_test_rw_db();
+    let tx = db.tx_mut().expect("failed to create rw tx");
+    let mut targets = MultiProofTargets::default();
+
+    let mut rng = StdRng::seed_from_u64(42);
+
+    for i in 0..num_accounts {
+        let address = Address::from_word(B256::from(U256::from(i)));
+        let hashed_address = keccak256(address);
+
+        // Distribution
+        // 80% 1-5 slots
+        // 15% 10-50 slots
+        // 5% 100-300 slots
+        let roll = rng.random_range(0..100);
+        let slots_count = if roll < 80 {
+            rng.random_range(1..=5)
+        } else if roll < 95 {
+            rng.random_range(10..=50)
+        } else {
+            rng.random_range(100..=300)
+        };
+
+        let mut slot_keys = HashSet::default();
+
+        for j in 0..slots_count {
+            let slot_key = B256::from(U256::from(j));
+            let hashed_slot = keccak256(slot_key);
+            let value = U256::from(1);
+
+            tx.put::<tables::HashedStorages>(
+                hashed_address,
+                StorageEntry { key: hashed_slot, value },
+            )
+            .expect("failed to insert");
+
+            slot_keys.insert(hashed_slot);
+        }
+
+        targets.0.insert(hashed_address, slot_keys);
+    }
+
+    tx.commit().expect("failed to commit");
+    (db, targets)
+}
+
+fn execute_proof_fetch(
+    db: &Arc<reth_db::test_utils::TempDatabase<DatabaseEnv>>,
+    chunks: Vec<MultiProofTargets>,
+) {
+    let tx = db.tx().expect("ro tx");
+
+    for chunk in chunks {
+        let mut cursor = tx.cursor_read::<tables::HashedStorages>().expect("cursor");
+
+        for (hashed_addr, slots) in chunk.0 {
+            // Seeking the Account
+            if let Some(_entry) = cursor.seek_by_key_subkey(hashed_addr, B256::ZERO).expect("seek")
+            {
+                for slot in slots {
+                    black_box(slot);
+                }
+            }
+        }
+    }
+}
+
+fn bench_chunking_strategies(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Smart_Chunking_Experiment");
+
+    let num_accounts = 1000;
+
+    let (db, targets) = seed_realistic_db(num_accounts);
+
+    group.bench_function("old_chunking_logic", |b| {
+        b.iter(|| {
+            let t = targets.clone();
+            let chunker = ChunkedMultiProofTargets::new(t, CHUNK_SIZE);
+            let chunks: Vec<_> = chunker.collect();
+            execute_proof_fetch(&db, chunks);
+        })
+    });
+
+    group.bench_function("smart_chunking_logic", |b| {
+        b.iter(|| {
+            let t = targets.clone();
+            let chunker = SmartChunkedMultiProofTargets::new(t, CHUNK_SIZE);
+            let chunks: Vec<_> = chunker.collect();
+            execute_proof_fetch(&db, chunks);
+        })
+    });
+
+    group.finish();
+}
+
+criterion_group!(benches, bench_chunking_strategies);
+criterion_main!(benches);
--- a/crates/trie/common/src/lib.rs
+++ b/crates/trie/common/src/lib.rs
@@ -52,7 +52,7 @@ pub use trie::{BranchNodeMasks, BranchNodeMasksMap, ProofTrieNode};
 /// The container indicates when the trie has been modified.
 pub mod prefix_set;

-mod proofs;
+pub mod proofs;
 #[cfg(any(test, feature = "test-utils"))]
 pub use proofs::triehash;
 pub use proofs::*;
--- a/crates/trie/common/src/proofs.rs
+++ b/crates/trie/common/src/proofs.rs
@@ -1,7 +1,10 @@
 //! Merkle trie proofs.
-
+#[allow(missing_docs)]
 use crate::{BranchNodeMasksMap, Nibbles, ProofTrieNode, TrieAccount};
-use alloc::{borrow::Cow, vec::Vec};
+use alloc::{
+    borrow::Cow,
+    vec::{IntoIter, Vec},
+};
 use alloy_consensus::constants::KECCAK_EMPTY;
 use alloy_primitives::{
    keccak256,
@@ -14,13 +17,14 @@ use alloy_trie::{
    proof::{verify_proof, DecodedProofNodes, ProofNodes, ProofVerificationError},
    EMPTY_ROOT_HASH,
 };
+use core::iter::Peekable;
 use derive_more::{Deref, DerefMut, IntoIterator};
 use itertools::Itertools;
 use reth_primitives_traits::Account;

 /// Proof targets map.
 #[derive(Deref, DerefMut, IntoIterator, Clone, PartialEq, Eq, Default, Debug)]
-pub struct MultiProofTargets(B256Map<B256Set>);
+pub struct MultiProofTargets(pub B256Map<B256Set>);

 impl FromIterator<(B256, B256Set)> for MultiProofTargets {
    fn from_iter<T: IntoIterator<Item = (B256, B256Set)>>(iter: T) -> Self {
@@ -90,6 +94,13 @@ impl MultiProofTargets {
        ChunkedMultiProofTargets::new(self, size)
    }

+    /// Returns an iterator that yields chunks respecting account boundaries.
+    ///
+    /// See [`SmartChunkedMultiProofTargets`] for more information.
+    pub fn smart_chunks(self, size: usize) -> SmartChunkedMultiProofTargets {
+        SmartChunkedMultiProofTargets::new(self, size)
+    }
+
    /// Returns the number of items that will be considered during chunking in `[Self::chunks]`.
    pub fn chunking_length(&self) -> usize {
        self.values().map(|slots| 1 + slots.len().saturating_sub(1)).sum::<usize>()
@@ -123,7 +134,7 @@ pub struct ChunkedMultiProofTargets {
 }

 impl ChunkedMultiProofTargets {
-    fn new(targets: MultiProofTargets, size: usize) -> Self {
+    pub fn new(targets: MultiProofTargets, size: usize) -> Self {
        let flattened_targets = targets
            .into_iter()
            .flat_map(|(address, slots)| {
@@ -166,6 +177,103 @@ impl Iterator for ChunkedMultiProofTargets {
    }
 }

+/// Iterator that respects account boundaries and enforces chunk limits.
+///
+/// - Small accounts are never split (preventing double-seeking).
+/// - Large accounts are split into multiple full chunks.
+#[derive(Debug)]
+pub struct SmartChunkedMultiProofTargets {
+    /// List of accounts to process.
+    accounts: Peekable<IntoIter<(B256, B256Set)>>,
+    /// If an account was too big, its remaining slots wait here for the next pass.
+    /// stored as: (Address, Sorted List of Slots)
+    pending_large_account: Option<(B256, Vec<B256>)>,
+    /// Chunk size limit.
+    chunk_size: usize,
+}
+
+impl SmartChunkedMultiProofTargets {
+    pub fn new(targets: MultiProofTargets, size: usize) -> Self {
+        // Flatten and Sort Accounts
+        let mut accounts_vec: Vec<(B256, B256Set)> = targets.0.into_iter().collect();
+        accounts_vec.sort_unstable_by(|a, b| a.0.cmp(&b.0));
+
+        Self {
+            accounts: accounts_vec.into_iter().peekable(),
+            pending_large_account: None,
+            chunk_size: size,
+        }
+    }
+}
+
+impl Iterator for SmartChunkedMultiProofTargets {
+    type Item = MultiProofTargets;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let mut current_chunk = MultiProofTargets::default();
+        let mut current_load = 0;
+
+        // Handle leftovers from a previous big account
+        if let Some((addr, mut remaining_slots)) = self.pending_large_account.take() {
+            let take_count = self.chunk_size;
+
+            if remaining_slots.len() > take_count {
+                // Still doesn't fit
+                // Slice off `chunk_size` slots
+                let chunk_slots: Vec<B256> = remaining_slots.drain(0..take_count).collect();
+                // Add to chunk
+                current_chunk.0.insert(addr, chunk_slots.into_iter().collect());
+                // Put the rest back in pending
+                self.pending_large_account = Some((addr, remaining_slots));
+
+                return Some(current_chunk);
+            } else {
+                // It fits. Logic proceeds to fill the rest of the chunk with other accounts
+                current_load += remaining_slots.len();
+                current_chunk.0.insert(addr, remaining_slots.into_iter().collect());
+            }
+        }
+
+        // Process normal accounts
+        while let Some((_address, slots)) = self.accounts.peek() {
+            let account_cost = if slots.is_empty() { 1 } else { slots.len() };
+
+            // Check fit
+            if current_load + account_cost > self.chunk_size {
+                if current_load == 0 {
+                    // The chunk is empty, but the account is big.
+                    let (addr, slots_set) = self.accounts.next().unwrap();
+                    // Convert Set to Sorted Vec (deterministic splitting)
+                    let mut sorted_slots: Vec<B256> = slots_set.into_iter().collect();
+                    sorted_slots.sort_unstable();
+                    // Take what fits
+                    let take_count = self.chunk_size;
+                    let chunk_slots: Vec<B256> = sorted_slots.drain(0..take_count).collect();
+                    // Save the rest
+                    self.pending_large_account = Some((addr, sorted_slots));
+
+                    current_chunk.0.insert(addr, chunk_slots.into_iter().collect());
+                    return Some(current_chunk);
+                } else {
+                    // Chunk has data, account doesn't fit.
+                    return Some(current_chunk);
+                }
+            }
+
+            // It fits. Consume it.
+            let (addr, slots) = self.accounts.next().unwrap();
+            current_load += account_cost;
+            current_chunk.0.insert(addr, slots);
+        }
+
+        if current_chunk.0.is_empty() {
+            None
+        } else {
+            Some(current_chunk)
+        }
+    }
+}
+
 /// The state multiproof of target accounts and multiproofs of their storage tries.
 /// Multiproof is effectively a state subtrie that only contains the nodes
 /// in the paths of target accounts.