mirror of
https://github.com/paradigmxyz/reth.git
synced 2026-01-09 07:17:56 -05:00
perf(trie): add FromIterator for HashedPostState and simplify from_bundle_state (#20653)
This commit is contained in:
@@ -136,3 +136,8 @@ rayon = ["dep:rayon"]
|
||||
[[bench]]
|
||||
name = "prefix_set"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "hashed_state"
|
||||
harness = false
|
||||
required-features = ["rayon"]
|
||||
|
||||
106
crates/trie/common/benches/hashed_state.rs
Normal file
106
crates/trie/common/benches/hashed_state.rs
Normal file
@@ -0,0 +1,106 @@
|
||||
#![allow(missing_docs, unreachable_pub)]
|
||||
use alloy_primitives::{B256, U256};
|
||||
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
|
||||
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
|
||||
use reth_primitives_traits::Account;
|
||||
use reth_trie_common::{HashedPostState, HashedStorage};
|
||||
|
||||
/// Generate test data: (`hashed_address`, account, storage)
|
||||
fn generate_test_data(size: usize) -> Vec<(B256, Option<Account>, Option<HashedStorage>)> {
|
||||
(0..size)
|
||||
.map(|i| {
|
||||
let hashed_address = B256::from(keccak256_u64(i as u64));
|
||||
|
||||
let account = if i % 10 == 0 {
|
||||
// 10% destroyed accounts
|
||||
None
|
||||
} else {
|
||||
Some(Account {
|
||||
nonce: i as u64,
|
||||
balance: U256::from(i * 1000),
|
||||
bytecode_hash: None,
|
||||
})
|
||||
};
|
||||
|
||||
let storage = (i % 3 == 0).then(|| {
|
||||
let mut storage = HashedStorage::new(false);
|
||||
// Add 5 storage slots per account with storage
|
||||
for j in 0..5 {
|
||||
storage
|
||||
.storage
|
||||
.insert(B256::from(keccak256_u64((i * 100 + j) as u64)), U256::from(j));
|
||||
}
|
||||
storage
|
||||
});
|
||||
|
||||
(hashed_address, account, storage)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Simple keccak256 mock for benchmarking (to avoid crypto overhead in bench)
|
||||
fn keccak256_u64(n: u64) -> [u8; 32] {
|
||||
let mut bytes = [0u8; 32];
|
||||
bytes[24..].copy_from_slice(&n.to_be_bytes());
|
||||
// XOR with some pattern to spread bits
|
||||
for (i, item) in bytes.iter_mut().enumerate() {
|
||||
*item ^= ((n.wrapping_mul(0x9e3779b97f4a7c15) >> (i * 8)) & 0xff) as u8;
|
||||
}
|
||||
bytes
|
||||
}
|
||||
|
||||
/// Comparison implementation: fold + reduce with `HashMap` (not used, kept for benchmarking)
|
||||
fn from_par_iter_fold_reduce(
|
||||
data: Vec<(B256, Option<Account>, Option<HashedStorage>)>,
|
||||
) -> HashedPostState {
|
||||
data.into_par_iter()
|
||||
.fold(HashedPostState::default, |mut acc, (hashed_address, info, hashed_storage)| {
|
||||
acc.accounts.insert(hashed_address, info);
|
||||
if let Some(storage) = hashed_storage {
|
||||
acc.storages.insert(hashed_address, storage);
|
||||
}
|
||||
acc
|
||||
})
|
||||
.reduce(HashedPostState::default, |mut a, b| {
|
||||
a.extend(b);
|
||||
a
|
||||
})
|
||||
}
|
||||
|
||||
/// Current implementation: collect to Vec (using rayon's optimized parallel collect), then
|
||||
/// sequentially collect to `HashedPostState`
|
||||
fn from_par_iter_collect_twice(
|
||||
data: Vec<(B256, Option<Account>, Option<HashedStorage>)>,
|
||||
) -> HashedPostState {
|
||||
let vec: Vec<_> = data.into_par_iter().collect();
|
||||
vec.into_iter().collect()
|
||||
}
|
||||
|
||||
fn bench_from_parallel_iterator(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("HashedPostState::from_par_iter");
|
||||
|
||||
for size in [100, 1_000, 10_000, 50_000] {
|
||||
let data = generate_test_data(size);
|
||||
|
||||
group.throughput(Throughput::Elements(size as u64));
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("fold_reduce", size), &data, |b, data| {
|
||||
b.iter(|| {
|
||||
let result = from_par_iter_fold_reduce(black_box(data.clone()));
|
||||
black_box(result);
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_with_input(BenchmarkId::new("collect_twice", size), &data, |b, data| {
|
||||
b.iter(|| {
|
||||
let result = from_par_iter_collect_twice(black_box(data.clone()));
|
||||
black_box(result);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_from_parallel_iterator);
|
||||
criterion_main!(benches);
|
||||
@@ -18,7 +18,7 @@ pub use rayon::*;
|
||||
use reth_primitives_traits::Account;
|
||||
|
||||
#[cfg(feature = "rayon")]
|
||||
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
|
||||
use rayon::prelude::{FromParallelIterator, IntoParallelIterator, ParallelIterator};
|
||||
|
||||
use revm_database::{AccountStatus, BundleAccount};
|
||||
|
||||
@@ -50,7 +50,7 @@ impl HashedPostState {
|
||||
pub fn from_bundle_state<'a, KH: KeyHasher>(
|
||||
state: impl IntoParallelIterator<Item = (&'a Address, &'a BundleAccount)>,
|
||||
) -> Self {
|
||||
let hashed = state
|
||||
state
|
||||
.into_par_iter()
|
||||
.map(|(address, account)| {
|
||||
let hashed_address = KH::hash_key(address);
|
||||
@@ -59,19 +59,14 @@ impl HashedPostState {
|
||||
account.status,
|
||||
account.storage.iter().map(|(slot, value)| (slot, &value.present_value)),
|
||||
);
|
||||
(hashed_address, (hashed_account, hashed_storage))
|
||||
})
|
||||
.collect::<Vec<(B256, (Option<Account>, HashedStorage))>>();
|
||||
|
||||
let mut accounts = HashMap::with_capacity_and_hasher(hashed.len(), Default::default());
|
||||
let mut storages = HashMap::with_capacity_and_hasher(hashed.len(), Default::default());
|
||||
for (address, (account, storage)) in hashed {
|
||||
accounts.insert(address, account);
|
||||
if !storage.is_empty() {
|
||||
storages.insert(address, storage);
|
||||
}
|
||||
}
|
||||
Self { accounts, storages }
|
||||
(
|
||||
hashed_address,
|
||||
hashed_account,
|
||||
(!hashed_storage.is_empty()).then_some(hashed_storage),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Initialize [`HashedPostState`] from bundle state.
|
||||
@@ -81,7 +76,7 @@ impl HashedPostState {
|
||||
pub fn from_bundle_state<'a, KH: KeyHasher>(
|
||||
state: impl IntoIterator<Item = (&'a Address, &'a BundleAccount)>,
|
||||
) -> Self {
|
||||
let hashed = state
|
||||
state
|
||||
.into_iter()
|
||||
.map(|(address, account)| {
|
||||
let hashed_address = KH::hash_key(address);
|
||||
@@ -90,19 +85,14 @@ impl HashedPostState {
|
||||
account.status,
|
||||
account.storage.iter().map(|(slot, value)| (slot, &value.present_value)),
|
||||
);
|
||||
(hashed_address, (hashed_account, hashed_storage))
|
||||
})
|
||||
.collect::<Vec<(B256, (Option<Account>, HashedStorage))>>();
|
||||
|
||||
let mut accounts = HashMap::with_capacity_and_hasher(hashed.len(), Default::default());
|
||||
let mut storages = HashMap::with_capacity_and_hasher(hashed.len(), Default::default());
|
||||
for (address, (account, storage)) in hashed {
|
||||
accounts.insert(address, account);
|
||||
if !storage.is_empty() {
|
||||
storages.insert(address, storage);
|
||||
}
|
||||
}
|
||||
Self { accounts, storages }
|
||||
(
|
||||
hashed_address,
|
||||
hashed_account,
|
||||
(!hashed_storage.is_empty()).then_some(hashed_storage),
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Construct [`HashedPostState`] from a single [`HashedStorage`].
|
||||
@@ -398,6 +388,62 @@ impl HashedPostState {
|
||||
}
|
||||
}
|
||||
|
||||
impl FromIterator<(B256, Option<Account>, Option<HashedStorage>)> for HashedPostState {
|
||||
/// Constructs a [`HashedPostState`] from an iterator of tuples containing:
|
||||
/// - Hashed address (B256)
|
||||
/// - Optional account info (`None` indicates destroyed account)
|
||||
/// - Optional hashed storage
|
||||
///
|
||||
/// # Important
|
||||
///
|
||||
/// - The iterator **assumes unique hashed addresses** (B256). If duplicate addresses are
|
||||
/// present, later entries will overwrite earlier ones for accounts, and storage will be
|
||||
/// merged.
|
||||
/// - The [`HashedStorage`] **must not be empty** (as determined by
|
||||
/// [`HashedStorage::is_empty`]). Empty storage should be represented as `None` rather than
|
||||
/// `Some(empty_storage)`. This ensures the storage map only contains meaningful entries.
|
||||
///
|
||||
/// Use `(!storage.is_empty()).then_some(storage)` to convert empty storage to `None`.
|
||||
fn from_iter<T: IntoIterator<Item = (B256, Option<Account>, Option<HashedStorage>)>>(
|
||||
iter: T,
|
||||
) -> Self {
|
||||
let iter = iter.into_iter();
|
||||
let (lower, _) = iter.size_hint();
|
||||
let mut hashed_state = Self::with_capacity(lower);
|
||||
|
||||
for (hashed_address, info, hashed_storage) in iter {
|
||||
hashed_state.accounts.insert(hashed_address, info);
|
||||
if let Some(storage) = hashed_storage {
|
||||
hashed_state.storages.insert(hashed_address, storage);
|
||||
}
|
||||
}
|
||||
|
||||
hashed_state
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "rayon")]
|
||||
impl FromParallelIterator<(B256, Option<Account>, Option<HashedStorage>)> for HashedPostState {
|
||||
/// Parallel version of [`FromIterator`] for constructing [`HashedPostState`] from a parallel
|
||||
/// iterator.
|
||||
///
|
||||
/// See [`FromIterator::from_iter`] for details on the expected input format.
|
||||
///
|
||||
/// # Important
|
||||
///
|
||||
/// - The iterator **assumes unique hashed addresses** (B256). If duplicate addresses are
|
||||
/// present, later entries will overwrite earlier ones for accounts, and storage will be
|
||||
/// merged.
|
||||
/// - The [`HashedStorage`] **must not be empty**. Empty storage should be `None`.
|
||||
fn from_par_iter<I>(par_iter: I) -> Self
|
||||
where
|
||||
I: IntoParallelIterator<Item = (B256, Option<Account>, Option<HashedStorage>)>,
|
||||
{
|
||||
let vec: Vec<_> = par_iter.into_par_iter().collect();
|
||||
vec.into_iter().collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Representation of in-memory hashed storage.
|
||||
#[derive(PartialEq, Eq, Clone, Debug, Default)]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
|
||||
|
||||
Reference in New Issue
Block a user