mirror of
https://github.com/paradigmxyz/reth.git
synced 2026-04-30 03:01:58 -04:00
Compare commits
2 Commits
push
...
pr-21432-r
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fd8bbf3dbc | ||
|
|
e4830fbab0 |
4
Cargo.lock
generated
4
Cargo.lock
generated
@@ -11135,8 +11135,12 @@ dependencies = [
|
||||
"plain_hasher",
|
||||
"proptest",
|
||||
"proptest-arbitrary-interop",
|
||||
"rand 0.9.2",
|
||||
"rayon",
|
||||
"reth-codecs",
|
||||
"reth-db",
|
||||
"reth-db-api",
|
||||
"reth-primitives",
|
||||
"reth-primitives-traits",
|
||||
"revm-database",
|
||||
"revm-state",
|
||||
|
||||
@@ -362,11 +362,12 @@ impl VersionedMultiProofTargets {
|
||||
}
|
||||
|
||||
/// Chunks this `VersionedMultiProofTargets` into smaller chunks of the given size.
|
||||
///
|
||||
/// Uses smart chunking for legacy targets to respect account boundaries and prevent
|
||||
/// double-seeking when processing proofs.
|
||||
fn chunks(self, chunk_size: usize) -> Box<dyn Iterator<Item = Self>> {
|
||||
match self {
|
||||
Self::Legacy(targets) => {
|
||||
Box::new(MultiProofTargets::chunks(targets, chunk_size).map(Self::Legacy))
|
||||
}
|
||||
Self::Legacy(targets) => Box::new(targets.smart_chunks(chunk_size).map(Self::Legacy)),
|
||||
Self::V2(targets) => {
|
||||
Box::new(ChunkedMultiProofTargetsV2::new(targets, chunk_size).map(Self::V2))
|
||||
}
|
||||
|
||||
@@ -46,6 +46,11 @@ arbitrary = { workspace = true, features = ["derive"], optional = true }
|
||||
rayon = { workspace = true, optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
reth-db = { workspace = true, features = ["test-utils", "mdbx"] }
|
||||
reth-db-api = { workspace = true }
|
||||
reth-primitives = { workspace = true }
|
||||
rand = { workspace = true }
|
||||
|
||||
reth-primitives-traits = { workspace = true, features = ["serde"] }
|
||||
reth-codecs.workspace = true
|
||||
alloy-genesis.workspace = true
|
||||
@@ -141,3 +146,7 @@ harness = false
|
||||
name = "hashed_state"
|
||||
harness = false
|
||||
required-features = ["rayon"]
|
||||
|
||||
[[bench]]
|
||||
name = "smart_chunking"
|
||||
harness = false
|
||||
|
||||
152
crates/trie/common/benches/smart_chunking.rs
Normal file
152
crates/trie/common/benches/smart_chunking.rs
Normal file
@@ -0,0 +1,152 @@
|
||||
#![allow(missing_docs, unreachable_pub)]
|
||||
|
||||
use alloy_primitives::{keccak256, map::HashSet, Address, B256, U256};
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use rand::{rngs::StdRng, Rng, SeedableRng};
|
||||
use reth_db::{
|
||||
cursor::DbDupCursorRO,
|
||||
tables,
|
||||
test_utils::create_test_rw_db,
|
||||
transaction::{DbTx, DbTxMut},
|
||||
Database, DatabaseEnv,
|
||||
};
|
||||
use reth_primitives::StorageEntry;
|
||||
use reth_trie_common::proofs::{
|
||||
ChunkedMultiProofTargets, MultiProofTargets, SmartChunkedMultiProofTargets,
|
||||
};
|
||||
use std::sync::Arc;
|
||||
|
||||
// Constant chunk size
|
||||
const CHUNK_SIZE: usize = 60;
|
||||
|
||||
// fn seed_fragmentation_db(num_accounts: usize, slots_per_account: usize) ->
|
||||
// (Arc<reth_db::test_utils::TempDatabase<DatabaseEnv>>, MultiProofTargets) { let db =
|
||||
// create_test_rw_db(); let tx = db.tx_mut().expect("failed to create rw tx");
|
||||
// let mut targets = MultiProofTargets::default();
|
||||
|
||||
// for i in 0..num_accounts {
|
||||
// let address = Address::from_word(B256::from(U256::from(i)));
|
||||
// let hashed_address = keccak256(address);
|
||||
|
||||
// let mut slot_keys = HashSet::default();
|
||||
|
||||
// for j in 0..slots_per_account {
|
||||
// let slot_key = B256::from(U256::from(j));
|
||||
// let hashed_slot = keccak256(slot_key);
|
||||
// let value = U256::from(1);
|
||||
|
||||
// tx.put::<tables::HashedStorages>(
|
||||
// hashed_address,
|
||||
// StorageEntry { key: hashed_slot, value }
|
||||
// ).expect("failed to insert");
|
||||
|
||||
// slot_keys.insert(hashed_slot);
|
||||
// }
|
||||
|
||||
// targets.0.insert(hashed_address, slot_keys);
|
||||
// }
|
||||
|
||||
// tx.commit().expect("failed to commit");
|
||||
// (db, targets)
|
||||
// }
|
||||
|
||||
fn seed_realistic_db(
|
||||
num_accounts: usize,
|
||||
) -> (Arc<reth_db::test_utils::TempDatabase<DatabaseEnv>>, MultiProofTargets) {
|
||||
let db = create_test_rw_db();
|
||||
let tx = db.tx_mut().expect("failed to create rw tx");
|
||||
let mut targets = MultiProofTargets::default();
|
||||
|
||||
let mut rng = StdRng::seed_from_u64(42);
|
||||
|
||||
for i in 0..num_accounts {
|
||||
let address = Address::from_word(B256::from(U256::from(i)));
|
||||
let hashed_address = keccak256(address);
|
||||
|
||||
// Distribution
|
||||
// 80% 1-5 slots
|
||||
// 15% 10-50 slots
|
||||
// 5% 100-300 slots
|
||||
let roll = rng.random_range(0..100);
|
||||
let slots_count = if roll < 80 {
|
||||
rng.random_range(1..=5)
|
||||
} else if roll < 95 {
|
||||
rng.random_range(10..=50)
|
||||
} else {
|
||||
rng.random_range(100..=300)
|
||||
};
|
||||
|
||||
let mut slot_keys = HashSet::default();
|
||||
|
||||
for j in 0..slots_count {
|
||||
let slot_key = B256::from(U256::from(j));
|
||||
let hashed_slot = keccak256(slot_key);
|
||||
let value = U256::from(1);
|
||||
|
||||
tx.put::<tables::HashedStorages>(
|
||||
hashed_address,
|
||||
StorageEntry { key: hashed_slot, value },
|
||||
)
|
||||
.expect("failed to insert");
|
||||
|
||||
slot_keys.insert(hashed_slot);
|
||||
}
|
||||
|
||||
targets.0.insert(hashed_address, slot_keys);
|
||||
}
|
||||
|
||||
tx.commit().expect("failed to commit");
|
||||
(db, targets)
|
||||
}
|
||||
|
||||
fn execute_proof_fetch(
|
||||
db: &Arc<reth_db::test_utils::TempDatabase<DatabaseEnv>>,
|
||||
chunks: Vec<MultiProofTargets>,
|
||||
) {
|
||||
let tx = db.tx().expect("ro tx");
|
||||
|
||||
for chunk in chunks {
|
||||
let mut cursor = tx.cursor_read::<tables::HashedStorages>().expect("cursor");
|
||||
|
||||
for (hashed_addr, slots) in chunk.0 {
|
||||
// Seeking the Account
|
||||
if let Some(_entry) = cursor.seek_by_key_subkey(hashed_addr, B256::ZERO).expect("seek")
|
||||
{
|
||||
for slot in slots {
|
||||
black_box(slot);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn bench_chunking_strategies(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("Smart_Chunking_Experiment");
|
||||
|
||||
let num_accounts = 1000;
|
||||
|
||||
let (db, targets) = seed_realistic_db(num_accounts);
|
||||
|
||||
group.bench_function("old_chunking_logic", |b| {
|
||||
b.iter(|| {
|
||||
let t = targets.clone();
|
||||
let chunker = ChunkedMultiProofTargets::new(t, CHUNK_SIZE);
|
||||
let chunks: Vec<_> = chunker.collect();
|
||||
execute_proof_fetch(&db, chunks);
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("smart_chunking_logic", |b| {
|
||||
b.iter(|| {
|
||||
let t = targets.clone();
|
||||
let chunker = SmartChunkedMultiProofTargets::new(t, CHUNK_SIZE);
|
||||
let chunks: Vec<_> = chunker.collect();
|
||||
execute_proof_fetch(&db, chunks);
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_chunking_strategies);
|
||||
criterion_main!(benches);
|
||||
@@ -52,7 +52,7 @@ pub use trie::{BranchNodeMasks, BranchNodeMasksMap, ProofTrieNode};
|
||||
/// The container indicates when the trie has been modified.
|
||||
pub mod prefix_set;
|
||||
|
||||
mod proofs;
|
||||
pub mod proofs;
|
||||
#[cfg(any(test, feature = "test-utils"))]
|
||||
pub use proofs::triehash;
|
||||
pub use proofs::*;
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
//! Merkle trie proofs.
|
||||
|
||||
#[allow(missing_docs)]
|
||||
use crate::{BranchNodeMasksMap, Nibbles, ProofTrieNode, TrieAccount};
|
||||
use alloc::{borrow::Cow, vec::Vec};
|
||||
use alloc::{
|
||||
borrow::Cow,
|
||||
vec::{IntoIter, Vec},
|
||||
};
|
||||
use alloy_consensus::constants::KECCAK_EMPTY;
|
||||
use alloy_primitives::{
|
||||
keccak256,
|
||||
@@ -14,13 +17,14 @@ use alloy_trie::{
|
||||
proof::{verify_proof, DecodedProofNodes, ProofNodes, ProofVerificationError},
|
||||
EMPTY_ROOT_HASH,
|
||||
};
|
||||
use core::iter::Peekable;
|
||||
use derive_more::{Deref, DerefMut, IntoIterator};
|
||||
use itertools::Itertools;
|
||||
use reth_primitives_traits::Account;
|
||||
|
||||
/// Proof targets map.
|
||||
#[derive(Deref, DerefMut, IntoIterator, Clone, PartialEq, Eq, Default, Debug)]
|
||||
pub struct MultiProofTargets(B256Map<B256Set>);
|
||||
pub struct MultiProofTargets(pub B256Map<B256Set>);
|
||||
|
||||
impl FromIterator<(B256, B256Set)> for MultiProofTargets {
|
||||
fn from_iter<T: IntoIterator<Item = (B256, B256Set)>>(iter: T) -> Self {
|
||||
@@ -90,6 +94,13 @@ impl MultiProofTargets {
|
||||
ChunkedMultiProofTargets::new(self, size)
|
||||
}
|
||||
|
||||
/// Returns an iterator that yields chunks respecting account boundaries.
|
||||
///
|
||||
/// See [`SmartChunkedMultiProofTargets`] for more information.
|
||||
pub fn smart_chunks(self, size: usize) -> SmartChunkedMultiProofTargets {
|
||||
SmartChunkedMultiProofTargets::new(self, size)
|
||||
}
|
||||
|
||||
/// Returns the number of items that will be considered during chunking in `[Self::chunks]`.
|
||||
pub fn chunking_length(&self) -> usize {
|
||||
self.values().map(|slots| 1 + slots.len().saturating_sub(1)).sum::<usize>()
|
||||
@@ -123,7 +134,7 @@ pub struct ChunkedMultiProofTargets {
|
||||
}
|
||||
|
||||
impl ChunkedMultiProofTargets {
|
||||
fn new(targets: MultiProofTargets, size: usize) -> Self {
|
||||
pub fn new(targets: MultiProofTargets, size: usize) -> Self {
|
||||
let flattened_targets = targets
|
||||
.into_iter()
|
||||
.flat_map(|(address, slots)| {
|
||||
@@ -166,6 +177,103 @@ impl Iterator for ChunkedMultiProofTargets {
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator that respects account boundaries and enforces chunk limits.
|
||||
///
|
||||
/// - Small accounts are never split (preventing double-seeking).
|
||||
/// - Large accounts are split into multiple full chunks.
|
||||
#[derive(Debug)]
|
||||
pub struct SmartChunkedMultiProofTargets {
|
||||
/// List of accounts to process.
|
||||
accounts: Peekable<IntoIter<(B256, B256Set)>>,
|
||||
/// If an account was too big, its remaining slots wait here for the next pass.
|
||||
/// stored as: (Address, Sorted List of Slots)
|
||||
pending_large_account: Option<(B256, Vec<B256>)>,
|
||||
/// Chunk size limit.
|
||||
chunk_size: usize,
|
||||
}
|
||||
|
||||
impl SmartChunkedMultiProofTargets {
|
||||
pub fn new(targets: MultiProofTargets, size: usize) -> Self {
|
||||
// Flatten and Sort Accounts
|
||||
let mut accounts_vec: Vec<(B256, B256Set)> = targets.0.into_iter().collect();
|
||||
accounts_vec.sort_unstable_by(|a, b| a.0.cmp(&b.0));
|
||||
|
||||
Self {
|
||||
accounts: accounts_vec.into_iter().peekable(),
|
||||
pending_large_account: None,
|
||||
chunk_size: size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for SmartChunkedMultiProofTargets {
|
||||
type Item = MultiProofTargets;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let mut current_chunk = MultiProofTargets::default();
|
||||
let mut current_load = 0;
|
||||
|
||||
// Handle leftovers from a previous big account
|
||||
if let Some((addr, mut remaining_slots)) = self.pending_large_account.take() {
|
||||
let take_count = self.chunk_size;
|
||||
|
||||
if remaining_slots.len() > take_count {
|
||||
// Still doesn't fit
|
||||
// Slice off `chunk_size` slots
|
||||
let chunk_slots: Vec<B256> = remaining_slots.drain(0..take_count).collect();
|
||||
// Add to chunk
|
||||
current_chunk.0.insert(addr, chunk_slots.into_iter().collect());
|
||||
// Put the rest back in pending
|
||||
self.pending_large_account = Some((addr, remaining_slots));
|
||||
|
||||
return Some(current_chunk);
|
||||
} else {
|
||||
// It fits. Logic proceeds to fill the rest of the chunk with other accounts
|
||||
current_load += remaining_slots.len();
|
||||
current_chunk.0.insert(addr, remaining_slots.into_iter().collect());
|
||||
}
|
||||
}
|
||||
|
||||
// Process normal accounts
|
||||
while let Some((_address, slots)) = self.accounts.peek() {
|
||||
let account_cost = if slots.is_empty() { 1 } else { slots.len() };
|
||||
|
||||
// Check fit
|
||||
if current_load + account_cost > self.chunk_size {
|
||||
if current_load == 0 {
|
||||
// The chunk is empty, but the account is big.
|
||||
let (addr, slots_set) = self.accounts.next().unwrap();
|
||||
// Convert Set to Sorted Vec (deterministic splitting)
|
||||
let mut sorted_slots: Vec<B256> = slots_set.into_iter().collect();
|
||||
sorted_slots.sort_unstable();
|
||||
// Take what fits
|
||||
let take_count = self.chunk_size;
|
||||
let chunk_slots: Vec<B256> = sorted_slots.drain(0..take_count).collect();
|
||||
// Save the rest
|
||||
self.pending_large_account = Some((addr, sorted_slots));
|
||||
|
||||
current_chunk.0.insert(addr, chunk_slots.into_iter().collect());
|
||||
return Some(current_chunk);
|
||||
} else {
|
||||
// Chunk has data, account doesn't fit.
|
||||
return Some(current_chunk);
|
||||
}
|
||||
}
|
||||
|
||||
// It fits. Consume it.
|
||||
let (addr, slots) = self.accounts.next().unwrap();
|
||||
current_load += account_cost;
|
||||
current_chunk.0.insert(addr, slots);
|
||||
}
|
||||
|
||||
if current_chunk.0.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(current_chunk)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The state multiproof of target accounts and multiproofs of their storage tries.
|
||||
/// Multiproof is effectively a state subtrie that only contains the nodes
|
||||
/// in the paths of target accounts.
|
||||
|
||||
Reference in New Issue
Block a user