perf(trie): add FromIterator for HashedPostState and simplify from_bundle_state (#20653)

This commit is contained in:
Matthias Seitz
2025-12-28 12:29:07 +01:00
committed by GitHub
parent cb1de1ac19
commit 05b3a8668c
3 changed files with 184 additions and 27 deletions

View File

@@ -136,3 +136,8 @@ rayon = ["dep:rayon"]
[[bench]]
name = "prefix_set"
harness = false
[[bench]]
name = "hashed_state"
harness = false
required-features = ["rayon"]

View File

@@ -0,0 +1,106 @@
#![allow(missing_docs, unreachable_pub)]
use alloy_primitives::{B256, U256};
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
use reth_primitives_traits::Account;
use reth_trie_common::{HashedPostState, HashedStorage};
/// Generate test data: (`hashed_address`, account, storage)
fn generate_test_data(size: usize) -> Vec<(B256, Option<Account>, Option<HashedStorage>)> {
(0..size)
.map(|i| {
let hashed_address = B256::from(keccak256_u64(i as u64));
let account = if i % 10 == 0 {
// 10% destroyed accounts
None
} else {
Some(Account {
nonce: i as u64,
balance: U256::from(i * 1000),
bytecode_hash: None,
})
};
let storage = (i % 3 == 0).then(|| {
let mut storage = HashedStorage::new(false);
// Add 5 storage slots per account with storage
for j in 0..5 {
storage
.storage
.insert(B256::from(keccak256_u64((i * 100 + j) as u64)), U256::from(j));
}
storage
});
(hashed_address, account, storage)
})
.collect()
}
/// Simple keccak256 mock for benchmarking (to avoid crypto overhead in bench)
fn keccak256_u64(n: u64) -> [u8; 32] {
let mut bytes = [0u8; 32];
bytes[24..].copy_from_slice(&n.to_be_bytes());
// XOR with some pattern to spread bits
for (i, item) in bytes.iter_mut().enumerate() {
*item ^= ((n.wrapping_mul(0x9e3779b97f4a7c15) >> (i * 8)) & 0xff) as u8;
}
bytes
}
/// Comparison implementation: fold + reduce with `HashMap` (not used, kept for benchmarking)
fn from_par_iter_fold_reduce(
data: Vec<(B256, Option<Account>, Option<HashedStorage>)>,
) -> HashedPostState {
data.into_par_iter()
.fold(HashedPostState::default, |mut acc, (hashed_address, info, hashed_storage)| {
acc.accounts.insert(hashed_address, info);
if let Some(storage) = hashed_storage {
acc.storages.insert(hashed_address, storage);
}
acc
})
.reduce(HashedPostState::default, |mut a, b| {
a.extend(b);
a
})
}
/// Current implementation: collect to Vec (using rayon's optimized parallel collect), then
/// sequentially collect to `HashedPostState`
fn from_par_iter_collect_twice(
data: Vec<(B256, Option<Account>, Option<HashedStorage>)>,
) -> HashedPostState {
let vec: Vec<_> = data.into_par_iter().collect();
vec.into_iter().collect()
}
fn bench_from_parallel_iterator(c: &mut Criterion) {
let mut group = c.benchmark_group("HashedPostState::from_par_iter");
for size in [100, 1_000, 10_000, 50_000] {
let data = generate_test_data(size);
group.throughput(Throughput::Elements(size as u64));
group.bench_with_input(BenchmarkId::new("fold_reduce", size), &data, |b, data| {
b.iter(|| {
let result = from_par_iter_fold_reduce(black_box(data.clone()));
black_box(result);
});
});
group.bench_with_input(BenchmarkId::new("collect_twice", size), &data, |b, data| {
b.iter(|| {
let result = from_par_iter_collect_twice(black_box(data.clone()));
black_box(result);
});
});
}
group.finish();
}
criterion_group!(benches, bench_from_parallel_iterator);
criterion_main!(benches);

View File

@@ -18,7 +18,7 @@ pub use rayon::*;
use reth_primitives_traits::Account;
#[cfg(feature = "rayon")]
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
use rayon::prelude::{FromParallelIterator, IntoParallelIterator, ParallelIterator};
use revm_database::{AccountStatus, BundleAccount};
@@ -50,7 +50,7 @@ impl HashedPostState {
pub fn from_bundle_state<'a, KH: KeyHasher>(
state: impl IntoParallelIterator<Item = (&'a Address, &'a BundleAccount)>,
) -> Self {
let hashed = state
state
.into_par_iter()
.map(|(address, account)| {
let hashed_address = KH::hash_key(address);
@@ -59,19 +59,14 @@ impl HashedPostState {
account.status,
account.storage.iter().map(|(slot, value)| (slot, &value.present_value)),
);
(hashed_address, (hashed_account, hashed_storage))
})
.collect::<Vec<(B256, (Option<Account>, HashedStorage))>>();
let mut accounts = HashMap::with_capacity_and_hasher(hashed.len(), Default::default());
let mut storages = HashMap::with_capacity_and_hasher(hashed.len(), Default::default());
for (address, (account, storage)) in hashed {
accounts.insert(address, account);
if !storage.is_empty() {
storages.insert(address, storage);
}
}
Self { accounts, storages }
(
hashed_address,
hashed_account,
(!hashed_storage.is_empty()).then_some(hashed_storage),
)
})
.collect()
}
/// Initialize [`HashedPostState`] from bundle state.
@@ -81,7 +76,7 @@ impl HashedPostState {
pub fn from_bundle_state<'a, KH: KeyHasher>(
state: impl IntoIterator<Item = (&'a Address, &'a BundleAccount)>,
) -> Self {
let hashed = state
state
.into_iter()
.map(|(address, account)| {
let hashed_address = KH::hash_key(address);
@@ -90,19 +85,14 @@ impl HashedPostState {
account.status,
account.storage.iter().map(|(slot, value)| (slot, &value.present_value)),
);
(hashed_address, (hashed_account, hashed_storage))
})
.collect::<Vec<(B256, (Option<Account>, HashedStorage))>>();
let mut accounts = HashMap::with_capacity_and_hasher(hashed.len(), Default::default());
let mut storages = HashMap::with_capacity_and_hasher(hashed.len(), Default::default());
for (address, (account, storage)) in hashed {
accounts.insert(address, account);
if !storage.is_empty() {
storages.insert(address, storage);
}
}
Self { accounts, storages }
(
hashed_address,
hashed_account,
(!hashed_storage.is_empty()).then_some(hashed_storage),
)
})
.collect()
}
/// Construct [`HashedPostState`] from a single [`HashedStorage`].
@@ -398,6 +388,62 @@ impl HashedPostState {
}
}
impl FromIterator<(B256, Option<Account>, Option<HashedStorage>)> for HashedPostState {
/// Constructs a [`HashedPostState`] from an iterator of tuples containing:
/// - Hashed address (B256)
/// - Optional account info (`None` indicates destroyed account)
/// - Optional hashed storage
///
/// # Important
///
/// - The iterator **assumes unique hashed addresses** (B256). If duplicate addresses are
/// present, later entries will overwrite earlier ones for accounts, and storage will be
/// merged.
/// - The [`HashedStorage`] **must not be empty** (as determined by
/// [`HashedStorage::is_empty`]). Empty storage should be represented as `None` rather than
/// `Some(empty_storage)`. This ensures the storage map only contains meaningful entries.
///
/// Use `(!storage.is_empty()).then_some(storage)` to convert empty storage to `None`.
fn from_iter<T: IntoIterator<Item = (B256, Option<Account>, Option<HashedStorage>)>>(
iter: T,
) -> Self {
let iter = iter.into_iter();
let (lower, _) = iter.size_hint();
let mut hashed_state = Self::with_capacity(lower);
for (hashed_address, info, hashed_storage) in iter {
hashed_state.accounts.insert(hashed_address, info);
if let Some(storage) = hashed_storage {
hashed_state.storages.insert(hashed_address, storage);
}
}
hashed_state
}
}
#[cfg(feature = "rayon")]
impl FromParallelIterator<(B256, Option<Account>, Option<HashedStorage>)> for HashedPostState {
/// Parallel version of [`FromIterator`] for constructing [`HashedPostState`] from a parallel
/// iterator.
///
/// See [`FromIterator::from_iter`] for details on the expected input format.
///
/// # Important
///
/// - The iterator **assumes unique hashed addresses** (B256). If duplicate addresses are
/// present, later entries will overwrite earlier ones for accounts, and storage will be
/// merged.
/// - The [`HashedStorage`] **must not be empty**. Empty storage should be `None`.
fn from_par_iter<I>(par_iter: I) -> Self
where
I: IntoParallelIterator<Item = (B256, Option<Account>, Option<HashedStorage>)>,
{
let vec: Vec<_> = par_iter.into_par_iter().collect();
vec.into_iter().collect()
}
}
/// Representation of in-memory hashed storage.
#[derive(PartialEq, Eq, Clone, Debug, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]