perf(trie): add HashedPostStateSorted::from_reverts (#20047)

This commit is contained in:
YK
2025-12-03 13:05:23 +08:00
committed by GitHub
parent 98e9a1d09e
commit e0a6f54b42
11 changed files with 437 additions and 124 deletions

View File

@@ -155,7 +155,7 @@ impl DeferredTrieData {
ancestors: &[Self],
) -> ComputedTrieData {
// Sort the current block's hashed state and trie updates
let sorted_hashed_state = Arc::new(hashed_state.clone().into_sorted());
let sorted_hashed_state = Arc::new(hashed_state.clone_into_sorted());
let sorted_trie_updates = Arc::new(trie_updates.clone().into_sorted());
// Merge trie data from ancestors (oldest -> newest so later state takes precedence)

View File

@@ -647,7 +647,7 @@ where
// Extend state overlay with current block's sorted state.
input.prefix_sets.extend(hashed_state.construct_prefix_sets());
let sorted_hashed_state = hashed_state.clone().into_sorted();
let sorted_hashed_state = hashed_state.clone_into_sorted();
Arc::make_mut(&mut input.state).extend_ref(&sorted_hashed_state);
let TrieInputSorted { nodes, state, prefix_sets: prefix_sets_mut } = input;

View File

@@ -12,9 +12,11 @@ use reth_stages_api::{
BlockErrorKind, ExecInput, ExecOutput, Stage, StageCheckpoint, StageError, StageId,
UnwindInput, UnwindOutput,
};
use reth_trie::{updates::TrieUpdates, HashedPostState, KeccakKeyHasher, StateRoot, TrieInput};
use reth_trie::{
updates::TrieUpdates, HashedPostStateSorted, KeccakKeyHasher, StateRoot, TrieInputSorted,
};
use reth_trie_db::{DatabaseHashedPostState, DatabaseStateRoot};
use std::ops::Range;
use std::{ops::Range, sync::Arc};
use tracing::{debug, error};
/// The `MerkleChangeSets` stage.
@@ -105,12 +107,12 @@ impl MerkleChangeSets {
Ok(target_start..target_end)
}
/// Calculates the trie updates given a [`TrieInput`], asserting that the resulting state root
/// matches the expected one for the block.
/// Calculates the trie updates given a [`TrieInputSorted`], asserting that the resulting state
/// root matches the expected one for the block.
fn calculate_block_trie_updates<Provider: DBProvider + HeaderProvider>(
provider: &Provider,
block_number: BlockNumber,
input: TrieInput,
input: TrieInputSorted,
) -> Result<TrieUpdates, StageError> {
let (root, trie_updates) =
StateRoot::overlay_root_from_nodes_with_updates(provider.tx_ref(), input).map_err(
@@ -192,21 +194,21 @@ impl MerkleChangeSets {
);
let mut per_block_state_reverts = Vec::new();
for block_number in target_range.clone() {
per_block_state_reverts.push(HashedPostState::from_reverts::<KeccakKeyHasher>(
per_block_state_reverts.push(HashedPostStateSorted::from_reverts::<KeccakKeyHasher>(
provider.tx_ref(),
block_number..=block_number,
)?);
}
// Helper to retrieve state revert data for a specific block from the pre-computed array
let get_block_state_revert = |block_number: BlockNumber| -> &HashedPostState {
let get_block_state_revert = |block_number: BlockNumber| -> &HashedPostStateSorted {
let index = (block_number - target_start) as usize;
&per_block_state_reverts[index]
};
// Helper to accumulate state reverts from a given block to the target end
let compute_cumulative_state_revert = |block_number: BlockNumber| -> HashedPostState {
let mut cumulative_revert = HashedPostState::default();
let compute_cumulative_state_revert = |block_number: BlockNumber| -> HashedPostStateSorted {
let mut cumulative_revert = HashedPostStateSorted::default();
for n in (block_number..target_end).rev() {
cumulative_revert.extend_ref(get_block_state_revert(n))
}
@@ -216,7 +218,7 @@ impl MerkleChangeSets {
// To calculate the changeset for a block, we first need the TrieUpdates which are
// generated as a result of processing the block. To get these we need:
// 1) The TrieUpdates which revert the db's trie to _prior_ to the block
// 2) The HashedPostState to revert the db's state to _after_ the block
// 2) The HashedPostStateSorted to revert the db's state to _after_ the block
//
// To get (1) for `target_start` we need to do a big state root calculation which takes
// into account all changes between that block and db tip. For each block after the
@@ -227,12 +229,15 @@ impl MerkleChangeSets {
?target_start,
"Computing trie state at starting block",
);
let mut input = TrieInput::default();
input.state = compute_cumulative_state_revert(target_start);
input.prefix_sets = input.state.construct_prefix_sets();
let initial_state = compute_cumulative_state_revert(target_start);
let initial_prefix_sets = initial_state.construct_prefix_sets();
let initial_input =
TrieInputSorted::new(Arc::default(), Arc::new(initial_state), initial_prefix_sets);
// target_start will be >= 1, see `determine_target_range`.
input.nodes =
Self::calculate_block_trie_updates(provider, target_start - 1, input.clone())?;
let mut nodes = Arc::new(
Self::calculate_block_trie_updates(provider, target_start - 1, initial_input)?
.into_sorted(),
);
for block_number in target_range {
debug!(
@@ -242,21 +247,24 @@ impl MerkleChangeSets {
);
// Revert the state so that this block has been just processed, meaning we take the
// cumulative revert of the subsequent block.
input.state = compute_cumulative_state_revert(block_number + 1);
let state = Arc::new(compute_cumulative_state_revert(block_number + 1));
// Construct prefix sets from only this block's `HashedPostState`, because we only care
// about trie updates which occurred as a result of this block being processed.
input.prefix_sets = get_block_state_revert(block_number).construct_prefix_sets();
// Construct prefix sets from only this block's `HashedPostStateSorted`, because we only
// care about trie updates which occurred as a result of this block being processed.
let prefix_sets = get_block_state_revert(block_number).construct_prefix_sets();
let input = TrieInputSorted::new(Arc::clone(&nodes), state, prefix_sets);
// Calculate the trie updates for this block, then apply those updates to the reverts.
// We calculate the overlay which will be passed into the next step using the trie
// reverts prior to them being updated.
let this_trie_updates =
Self::calculate_block_trie_updates(provider, block_number, input.clone())?;
Self::calculate_block_trie_updates(provider, block_number, input)?.into_sorted();
let trie_overlay = input.nodes.clone().into_sorted();
input.nodes.extend_ref(&this_trie_updates);
let this_trie_updates = this_trie_updates.into_sorted();
let trie_overlay = Arc::clone(&nodes);
let mut nodes_mut = Arc::unwrap_or_clone(nodes);
nodes_mut.extend_ref(&this_trie_updates);
nodes = Arc::new(nodes_mut);
// Write the changesets to the DB using the trie updates produced by the block, and the
// trie reverts as the overlay.

View File

@@ -21,8 +21,9 @@ use reth_trie::{
proof::{Proof, StorageProof},
updates::TrieUpdates,
witness::TrieWitness,
AccountProof, HashedPostState, HashedStorage, KeccakKeyHasher, MultiProof, MultiProofTargets,
StateRoot, StorageMultiProof, StorageRoot, TrieInput,
AccountProof, HashedPostState, HashedPostStateSorted, HashedStorage, KeccakKeyHasher,
MultiProof, MultiProofTargets, StateRoot, StorageMultiProof, StorageRoot, TrieInput,
TrieInputSorted,
};
use reth_trie_db::{
DatabaseHashedPostState, DatabaseHashedStorage, DatabaseProof, DatabaseStateRoot,
@@ -118,7 +119,7 @@ impl<'b, Provider: DBProvider + BlockNumReader> HistoricalStateProviderRef<'b, P
}
/// Retrieve revert hashed state for this history provider.
fn revert_state(&self) -> ProviderResult<HashedPostState> {
fn revert_state(&self) -> ProviderResult<HashedPostStateSorted> {
if !self.lowest_available_blocks.is_account_history_available(self.block_number) ||
!self.lowest_available_blocks.is_storage_history_available(self.block_number)
{
@@ -133,7 +134,8 @@ impl<'b, Provider: DBProvider + BlockNumReader> HistoricalStateProviderRef<'b, P
);
}
Ok(HashedPostState::from_reverts::<KeccakKeyHasher>(self.tx(), self.block_number..)?)
HashedPostStateSorted::from_reverts::<KeccakKeyHasher>(self.tx(), self.block_number..)
.map_err(ProviderError::from)
}
/// Retrieve revert hashed storage for this history provider and target address.
@@ -287,14 +289,15 @@ impl<Provider: DBProvider + BlockNumReader> StateRootProvider
{
fn state_root(&self, hashed_state: HashedPostState) -> ProviderResult<B256> {
let mut revert_state = self.revert_state()?;
revert_state.extend(hashed_state);
StateRoot::overlay_root(self.tx(), revert_state)
let hashed_state_sorted = hashed_state.into_sorted();
revert_state.extend_ref(&hashed_state_sorted);
StateRoot::overlay_root(self.tx(), &revert_state)
.map_err(|err| ProviderError::Database(err.into()))
}
fn state_root_from_nodes(&self, mut input: TrieInput) -> ProviderResult<B256> {
input.prepend(self.revert_state()?);
StateRoot::overlay_root_from_nodes(self.tx(), input)
input.prepend(self.revert_state()?.into());
StateRoot::overlay_root_from_nodes(self.tx(), TrieInputSorted::from_unsorted(input))
.map_err(|err| ProviderError::Database(err.into()))
}
@@ -303,8 +306,9 @@ impl<Provider: DBProvider + BlockNumReader> StateRootProvider
hashed_state: HashedPostState,
) -> ProviderResult<(B256, TrieUpdates)> {
let mut revert_state = self.revert_state()?;
revert_state.extend(hashed_state);
StateRoot::overlay_root_with_updates(self.tx(), revert_state)
let hashed_state_sorted = hashed_state.into_sorted();
revert_state.extend_ref(&hashed_state_sorted);
StateRoot::overlay_root_with_updates(self.tx(), &revert_state)
.map_err(|err| ProviderError::Database(err.into()))
}
@@ -312,9 +316,12 @@ impl<Provider: DBProvider + BlockNumReader> StateRootProvider
&self,
mut input: TrieInput,
) -> ProviderResult<(B256, TrieUpdates)> {
input.prepend(self.revert_state()?);
StateRoot::overlay_root_from_nodes_with_updates(self.tx(), input)
.map_err(|err| ProviderError::Database(err.into()))
input.prepend(self.revert_state()?.into());
StateRoot::overlay_root_from_nodes_with_updates(
self.tx(),
TrieInputSorted::from_unsorted(input),
)
.map_err(|err| ProviderError::Database(err.into()))
}
}
@@ -367,7 +374,7 @@ impl<Provider: DBProvider + BlockNumReader> StateProofProvider
address: Address,
slots: &[B256],
) -> ProviderResult<AccountProof> {
input.prepend(self.revert_state()?);
input.prepend(self.revert_state()?.into());
let proof = <Proof<_, _> as DatabaseProof>::from_tx(self.tx());
proof.overlay_account_proof(input, address, slots).map_err(ProviderError::from)
}
@@ -377,13 +384,13 @@ impl<Provider: DBProvider + BlockNumReader> StateProofProvider
mut input: TrieInput,
targets: MultiProofTargets,
) -> ProviderResult<MultiProof> {
input.prepend(self.revert_state()?);
input.prepend(self.revert_state()?.into());
let proof = <Proof<_, _> as DatabaseProof>::from_tx(self.tx());
proof.overlay_multiproof(input, targets).map_err(ProviderError::from)
}
fn witness(&self, mut input: TrieInput, target: HashedPostState) -> ProviderResult<Vec<Bytes>> {
input.prepend(self.revert_state()?);
input.prepend(self.revert_state()?.into());
TrieWitness::overlay_witness(self.tx(), input, target)
.map_err(ProviderError::from)
.map(|hm| hm.into_values().collect())

View File

@@ -12,7 +12,7 @@ use reth_trie::{
updates::TrieUpdates,
witness::TrieWitness,
AccountProof, HashedPostState, HashedStorage, KeccakKeyHasher, MultiProof, MultiProofTargets,
StateRoot, StorageMultiProof, StorageRoot, TrieInput,
StateRoot, StorageMultiProof, StorageRoot, TrieInput, TrieInputSorted,
};
use reth_trie_db::{
DatabaseProof, DatabaseStateRoot, DatabaseStorageProof, DatabaseStorageRoot,
@@ -60,12 +60,12 @@ impl<Provider: BlockHashReader> BlockHashReader for LatestStateProviderRef<'_, P
impl<Provider: DBProvider + Sync> StateRootProvider for LatestStateProviderRef<'_, Provider> {
fn state_root(&self, hashed_state: HashedPostState) -> ProviderResult<B256> {
StateRoot::overlay_root(self.tx(), hashed_state)
StateRoot::overlay_root(self.tx(), &hashed_state.into_sorted())
.map_err(|err| ProviderError::Database(err.into()))
}
fn state_root_from_nodes(&self, input: TrieInput) -> ProviderResult<B256> {
StateRoot::overlay_root_from_nodes(self.tx(), input)
StateRoot::overlay_root_from_nodes(self.tx(), TrieInputSorted::from_unsorted(input))
.map_err(|err| ProviderError::Database(err.into()))
}
@@ -73,7 +73,7 @@ impl<Provider: DBProvider + Sync> StateRootProvider for LatestStateProviderRef<'
&self,
hashed_state: HashedPostState,
) -> ProviderResult<(B256, TrieUpdates)> {
StateRoot::overlay_root_with_updates(self.tx(), hashed_state)
StateRoot::overlay_root_with_updates(self.tx(), &hashed_state.into_sorted())
.map_err(|err| ProviderError::Database(err.into()))
}
@@ -81,8 +81,11 @@ impl<Provider: DBProvider + Sync> StateRootProvider for LatestStateProviderRef<'
&self,
input: TrieInput,
) -> ProviderResult<(B256, TrieUpdates)> {
StateRoot::overlay_root_from_nodes_with_updates(self.tx(), input)
.map_err(|err| ProviderError::Database(err.into()))
StateRoot::overlay_root_from_nodes_with_updates(
self.tx(),
TrieInputSorted::from_unsorted(input),
)
.map_err(|err| ProviderError::Database(err.into()))
}
}

View File

@@ -14,7 +14,7 @@ use reth_trie::{
hashed_cursor::{HashedCursorFactory, HashedPostStateCursorFactory},
trie_cursor::{InMemoryTrieCursorFactory, TrieCursorFactory},
updates::TrieUpdatesSorted,
HashedPostState, HashedPostStateSorted, KeccakKeyHasher,
HashedPostStateSorted, KeccakKeyHasher,
};
use reth_trie_db::{
DatabaseHashedCursorFactory, DatabaseHashedPostState, DatabaseTrieCursorFactory,
@@ -234,13 +234,10 @@ where
let _guard = debug_span!(target: "providers::state::overlay", "Retrieving hashed state reverts").entered();
let start = Instant::now();
// TODO(mediocregopher) make from_reverts return sorted
// https://github.com/paradigmxyz/reth/issues/19382
let res = HashedPostState::from_reverts::<KeccakKeyHasher>(
let res = HashedPostStateSorted::from_reverts::<KeccakKeyHasher>(
provider.tx_ref(),
from_block + 1..,
)?
.into_sorted();
)?;
retrieve_hashed_state_reverts_duration = start.elapsed();
res
};

View File

@@ -917,7 +917,7 @@ mod tests {
assert_eq!(
StateRoot::overlay_root(
tx,
provider_factory.hashed_post_state(&state.bundle_state)
&provider_factory.hashed_post_state(&state.bundle_state).into_sorted()
)
.unwrap(),
state_root(expected.clone().into_iter().map(|(address, (account, storage))| (

View File

@@ -22,7 +22,8 @@ use rayon::prelude::{IntoParallelIterator, ParallelIterator};
use revm_database::{AccountStatus, BundleAccount};
/// Representation of in-memory hashed state.
/// In-memory hashed state that stores account and storage changes with keccak256-hashed keys in
/// hash maps.
#[derive(PartialEq, Eq, Clone, Default, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct HashedPostState {
@@ -375,6 +376,21 @@ impl HashedPostState {
HashedPostStateSorted { accounts, storages }
}
/// Creates a sorted copy without consuming self.
/// More efficient than `.clone().into_sorted()` as it avoids cloning `HashMap` metadata.
pub fn clone_into_sorted(&self) -> HashedPostStateSorted {
let mut accounts: Vec<_> = self.accounts.iter().map(|(&k, &v)| (k, v)).collect();
accounts.sort_unstable_by_key(|(address, _)| *address);
let storages = self
.storages
.iter()
.map(|(&hashed_address, storage)| (hashed_address, storage.clone_into_sorted()))
.collect();
HashedPostStateSorted { accounts, storages }
}
/// Clears the account and storage maps of this `HashedPostState`.
pub fn clear(&mut self) {
self.accounts.clear();
@@ -467,6 +483,15 @@ impl HashedStorage {
HashedStorageSorted { storage_slots, wiped: self.wiped }
}
/// Creates a sorted copy without consuming self.
/// More efficient than `.clone().into_sorted()` as it avoids cloning `HashMap` metadata.
pub fn clone_into_sorted(&self) -> HashedStorageSorted {
let mut storage_slots: Vec<_> = self.storage.iter().map(|(&k, &v)| (k, v)).collect();
storage_slots.sort_unstable_by_key(|(key, _)| *key);
HashedStorageSorted { storage_slots, wiped: self.wiped }
}
}
/// Sorted hashed post state optimized for iterating during state trie calculation.
@@ -507,6 +532,46 @@ impl HashedPostStateSorted {
self.accounts.len() + self.storages.values().map(|s| s.len()).sum::<usize>()
}
/// Construct [`TriePrefixSetsMut`] from hashed post state.
///
/// The prefix sets contain the hashed account and storage keys that have been changed in the
/// post state.
pub fn construct_prefix_sets(&self) -> TriePrefixSetsMut {
let mut account_prefix_set = PrefixSetMut::with_capacity(self.accounts.len());
let mut destroyed_accounts = HashSet::default();
for (hashed_address, account) in &self.accounts {
account_prefix_set.insert(Nibbles::unpack(hashed_address));
if account.is_none() {
destroyed_accounts.insert(*hashed_address);
}
}
let mut storage_prefix_sets =
B256Map::with_capacity_and_hasher(self.storages.len(), Default::default());
for (hashed_address, hashed_storage) in &self.storages {
// Ensure account trie covers storage overlays even if account map is empty.
account_prefix_set.insert(Nibbles::unpack(hashed_address));
let prefix_set = if hashed_storage.wiped {
PrefixSetMut::all()
} else {
let mut prefix_set =
PrefixSetMut::with_capacity(hashed_storage.storage_slots.len());
prefix_set.extend_keys(
hashed_storage
.storage_slots
.iter()
.map(|(hashed_slot, _)| Nibbles::unpack(hashed_slot)),
);
prefix_set
};
storage_prefix_sets.insert(*hashed_address, prefix_set);
}
TriePrefixSetsMut { account_prefix_set, storage_prefix_sets, destroyed_accounts }
}
/// Extends this state with contents of another sorted state.
/// Entries in `other` take precedence for duplicate keys.
pub fn extend_ref(&mut self, other: &Self) {
@@ -1391,4 +1456,77 @@ mod tests {
);
}
}
#[test]
fn test_clone_into_sorted_equivalence() {
let addr1 = B256::from([1; 32]);
let addr2 = B256::from([2; 32]);
let addr3 = B256::from([3; 32]);
let slot1 = B256::from([1; 32]);
let slot2 = B256::from([2; 32]);
let slot3 = B256::from([3; 32]);
let state = HashedPostState {
accounts: B256Map::from_iter([
(addr1, Some(Account { nonce: 1, balance: U256::from(100), bytecode_hash: None })),
(addr2, None),
(addr3, Some(Account::default())),
]),
storages: B256Map::from_iter([
(
addr1,
HashedStorage {
wiped: false,
storage: B256Map::from_iter([
(slot1, U256::from(10)),
(slot2, U256::from(20)),
]),
},
),
(
addr2,
HashedStorage {
wiped: true,
storage: B256Map::from_iter([(slot3, U256::ZERO)]),
},
),
]),
};
// clone_into_sorted should produce the same result as clone().into_sorted()
let sorted_via_clone = state.clone().into_sorted();
let sorted_via_clone_into = state.clone_into_sorted();
assert_eq!(sorted_via_clone, sorted_via_clone_into);
// Verify the original state is not consumed
assert_eq!(state.accounts.len(), 3);
assert_eq!(state.storages.len(), 2);
}
#[test]
fn test_hashed_storage_clone_into_sorted_equivalence() {
let slot1 = B256::from([1; 32]);
let slot2 = B256::from([2; 32]);
let slot3 = B256::from([3; 32]);
let storage = HashedStorage {
wiped: true,
storage: B256Map::from_iter([
(slot1, U256::from(100)),
(slot2, U256::ZERO),
(slot3, U256::from(300)),
]),
};
// clone_into_sorted should produce the same result as clone().into_sorted()
let sorted_via_clone = storage.clone().into_sorted();
let sorted_via_clone_into = storage.clone_into_sorted();
assert_eq!(sorted_via_clone, sorted_via_clone_into);
// Verify the original storage is not consumed
assert_eq!(storage.storage.len(), 3);
assert!(storage.wiped);
}
}

View File

@@ -1,4 +1,5 @@
use alloc::vec::Vec;
use core::cmp::Ordering;
/// Helper function to extend a sorted vector with another sorted vector.
/// Values from `other` take precedence for duplicate keys.
@@ -24,7 +25,6 @@ where
// Iterate through target and update/collect items from other
for target_item in target.iter_mut() {
while let Some(other_item) = other_iter.peek() {
use core::cmp::Ordering;
match other_item.0.cmp(&target_item.0) {
Ordering::Less => {
// Other item comes before current target item, collect it
@@ -51,3 +51,16 @@ where
target.sort_unstable_by(|a, b| a.0.cmp(&b.0));
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extend_sorted_vec() {
let mut target = vec![(1, "a"), (3, "c")];
let other = vec![(2, "b"), (3, "c_new")];
extend_sorted_vec(&mut target, &other);
assert_eq!(target, vec![(1, "a"), (2, "b"), (3, "c_new")]);
}
}

View File

@@ -1,8 +1,5 @@
use crate::{DatabaseHashedCursorFactory, DatabaseTrieCursorFactory, PrefixSetLoader};
use alloy_primitives::{
map::{AddressMap, B256Map},
BlockNumber, B256, U256,
};
use alloy_primitives::{map::B256Map, BlockNumber, B256};
use reth_db_api::{
cursor::DbCursorRO,
models::{AccountBeforeTx, BlockNumberAddress, BlockNumberAddressRange},
@@ -13,11 +10,11 @@ use reth_db_api::{
use reth_execution_errors::StateRootError;
use reth_trie::{
hashed_cursor::HashedPostStateCursorFactory, trie_cursor::InMemoryTrieCursorFactory,
updates::TrieUpdates, HashedPostState, HashedStorage, KeccakKeyHasher, KeyHasher, StateRoot,
StateRootProgress, TrieInput,
updates::TrieUpdates, HashedPostStateSorted, HashedStorageSorted, KeccakKeyHasher, KeyHasher,
StateRoot, StateRootProgress, TrieInputSorted,
};
use std::{
collections::HashMap,
collections::HashSet,
ops::{RangeBounds, RangeInclusive},
};
use tracing::{debug, instrument};
@@ -73,7 +70,7 @@ pub trait DatabaseStateRoot<'a, TX>: Sized {
range: RangeInclusive<BlockNumber>,
) -> Result<StateRootProgress, StateRootError>;
/// Calculate the state root for this [`HashedPostState`].
/// Calculate the state root for this [`HashedPostStateSorted`].
/// Internally, this method retrieves prefixsets and uses them
/// to calculate incremental state root.
///
@@ -99,40 +96,43 @@ pub trait DatabaseStateRoot<'a, TX>: Sized {
///
/// // Calculate the state root
/// let tx = db.tx().expect("failed to create transaction");
/// let state_root = StateRoot::overlay_root(&tx, hashed_state);
/// let state_root = StateRoot::overlay_root(&tx, &hashed_state.into_sorted());
/// ```
///
/// # Returns
///
/// The state root for this [`HashedPostState`].
fn overlay_root(tx: &'a TX, post_state: HashedPostState) -> Result<B256, StateRootError>;
/// The state root for this [`HashedPostStateSorted`].
fn overlay_root(tx: &'a TX, post_state: &HashedPostStateSorted)
-> Result<B256, StateRootError>;
/// Calculates the state root for this [`HashedPostState`] and returns it alongside trie
/// Calculates the state root for this [`HashedPostStateSorted`] and returns it alongside trie
/// updates. See [`Self::overlay_root`] for more info.
fn overlay_root_with_updates(
tx: &'a TX,
post_state: HashedPostState,
post_state: &HashedPostStateSorted,
) -> Result<(B256, TrieUpdates), StateRootError>;
/// Calculates the state root for provided [`HashedPostState`] using cached intermediate nodes.
fn overlay_root_from_nodes(tx: &'a TX, input: TrieInput) -> Result<B256, StateRootError>;
/// Calculates the state root for provided [`HashedPostStateSorted`] using cached intermediate
/// nodes.
fn overlay_root_from_nodes(tx: &'a TX, input: TrieInputSorted) -> Result<B256, StateRootError>;
/// Calculates the state root and trie updates for provided [`HashedPostState`] using
/// Calculates the state root and trie updates for provided [`HashedPostStateSorted`] using
/// cached intermediate nodes.
fn overlay_root_from_nodes_with_updates(
tx: &'a TX,
input: TrieInput,
input: TrieInputSorted,
) -> Result<(B256, TrieUpdates), StateRootError>;
}
/// Extends [`HashedPostState`] with operations specific for working with a database transaction.
/// Extends [`HashedPostStateSorted`] with operations specific for working with a database
/// transaction.
pub trait DatabaseHashedPostState<TX>: Sized {
/// Initializes [`HashedPostState`] from reverts. Iterates over state reverts in the specified
/// range and aggregates them into hashed state in reverse.
/// Initializes [`HashedPostStateSorted`] from reverts. Iterates over state reverts in the
/// specified range and aggregates them into sorted hashed state.
fn from_reverts<KH: KeyHasher>(
tx: &TX,
range: impl RangeBounds<BlockNumber>,
) -> Result<Self, DatabaseError>;
) -> Result<HashedPostStateSorted, DatabaseError>;
}
impl<'a, TX: DbTx> DatabaseStateRoot<'a, TX>
@@ -174,12 +174,14 @@ impl<'a, TX: DbTx> DatabaseStateRoot<'a, TX>
Self::incremental_root_calculator(tx, range)?.root_with_progress()
}
fn overlay_root(tx: &'a TX, post_state: HashedPostState) -> Result<B256, StateRootError> {
fn overlay_root(
tx: &'a TX,
post_state: &HashedPostStateSorted,
) -> Result<B256, StateRootError> {
let prefix_sets = post_state.construct_prefix_sets().freeze();
let state_sorted = post_state.into_sorted();
StateRoot::new(
DatabaseTrieCursorFactory::new(tx),
HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_sorted),
HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), post_state),
)
.with_prefix_sets(prefix_sets)
.root()
@@ -187,24 +189,27 @@ impl<'a, TX: DbTx> DatabaseStateRoot<'a, TX>
fn overlay_root_with_updates(
tx: &'a TX,
post_state: HashedPostState,
post_state: &HashedPostStateSorted,
) -> Result<(B256, TrieUpdates), StateRootError> {
let prefix_sets = post_state.construct_prefix_sets().freeze();
let state_sorted = post_state.into_sorted();
StateRoot::new(
DatabaseTrieCursorFactory::new(tx),
HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_sorted),
HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), post_state),
)
.with_prefix_sets(prefix_sets)
.root_with_updates()
}
fn overlay_root_from_nodes(tx: &'a TX, input: TrieInput) -> Result<B256, StateRootError> {
let state_sorted = input.state.into_sorted();
let nodes_sorted = input.nodes.into_sorted();
fn overlay_root_from_nodes(tx: &'a TX, input: TrieInputSorted) -> Result<B256, StateRootError> {
StateRoot::new(
InMemoryTrieCursorFactory::new(DatabaseTrieCursorFactory::new(tx), &nodes_sorted),
HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_sorted),
InMemoryTrieCursorFactory::new(
DatabaseTrieCursorFactory::new(tx),
input.nodes.as_ref(),
),
HashedPostStateCursorFactory::new(
DatabaseHashedCursorFactory::new(tx),
input.state.as_ref(),
),
)
.with_prefix_sets(input.prefix_sets.freeze())
.root()
@@ -212,77 +217,123 @@ impl<'a, TX: DbTx> DatabaseStateRoot<'a, TX>
fn overlay_root_from_nodes_with_updates(
tx: &'a TX,
input: TrieInput,
input: TrieInputSorted,
) -> Result<(B256, TrieUpdates), StateRootError> {
let state_sorted = input.state.into_sorted();
let nodes_sorted = input.nodes.into_sorted();
StateRoot::new(
InMemoryTrieCursorFactory::new(DatabaseTrieCursorFactory::new(tx), &nodes_sorted),
HashedPostStateCursorFactory::new(DatabaseHashedCursorFactory::new(tx), &state_sorted),
InMemoryTrieCursorFactory::new(
DatabaseTrieCursorFactory::new(tx),
input.nodes.as_ref(),
),
HashedPostStateCursorFactory::new(
DatabaseHashedCursorFactory::new(tx),
input.state.as_ref(),
),
)
.with_prefix_sets(input.prefix_sets.freeze())
.root_with_updates()
}
}
impl<TX: DbTx> DatabaseHashedPostState<TX> for HashedPostState {
impl<TX: DbTx> DatabaseHashedPostState<TX> for HashedPostStateSorted {
/// Builds a sorted hashed post-state from reverts.
///
/// Reads MDBX data directly into Vecs, using `HashSet`s only to track seen keys.
/// This avoids intermediate `HashMap` allocations since MDBX data is already sorted.
///
/// - Reads the first occurrence of each changed account/storage slot in the range.
/// - Hashes keys and returns them already ordered for trie iteration.
#[instrument(target = "trie::db", skip(tx), fields(range))]
fn from_reverts<KH: KeyHasher>(
tx: &TX,
range: impl RangeBounds<BlockNumber>,
) -> Result<Self, DatabaseError> {
// Iterate over account changesets and record value before first occurring account change.
let account_range = (range.start_bound(), range.end_bound()); // to avoid cloning
let mut accounts = HashMap::new();
// Read accounts directly into Vec with HashSet to track seen keys.
// Only keep the first (oldest) occurrence of each account.
let mut accounts = Vec::new();
let mut seen_accounts = HashSet::new();
let account_range = (range.start_bound(), range.end_bound());
let mut account_changesets_cursor = tx.cursor_read::<tables::AccountChangeSets>()?;
for entry in account_changesets_cursor.walk_range(account_range)? {
let (_, AccountBeforeTx { address, info }) = entry?;
accounts.entry(address).or_insert(info);
if seen_accounts.insert(address) {
accounts.push((KH::hash_key(address), info));
}
}
accounts.sort_unstable_by_key(|(hash, _)| *hash);
// Iterate over storage changesets and record value before first occurring storage change.
// Read storages directly into B256Map<Vec<_>> with HashSet to track seen keys.
// Only keep the first (oldest) occurrence of each (address, slot) pair.
let storage_range: BlockNumberAddressRange = range.into();
let mut storages = AddressMap::<B256Map<U256>>::default();
let mut storages = B256Map::<Vec<_>>::default();
let mut seen_storage_keys = HashSet::new();
let mut storage_changesets_cursor = tx.cursor_read::<tables::StorageChangeSets>()?;
for entry in storage_changesets_cursor.walk_range(storage_range)? {
let (BlockNumberAddress((_, address)), storage) = entry?;
let account_storage = storages.entry(address).or_default();
account_storage.entry(storage.key).or_insert(storage.value);
if seen_storage_keys.insert((address, storage.key)) {
let hashed_address = KH::hash_key(address);
storages
.entry(hashed_address)
.or_default()
.push((KH::hash_key(storage.key), storage.value));
}
}
let hashed_accounts =
accounts.into_iter().map(|(address, info)| (KH::hash_key(address), info)).collect();
// Sort storage slots and convert to HashedStorageSorted
let hashed_storages = storages
.into_iter()
.map(|(address, storage)| {
(
KH::hash_key(address),
HashedStorage::from_iter(
// The `wiped` flag indicates only whether previous storage entries
// should be looked up in db or not. For reverts it's a noop since all
// wiped changes had been written as storage reverts.
false,
storage.into_iter().map(|(slot, value)| (KH::hash_key(slot), value)),
),
)
.map(|(address, mut slots)| {
slots.sort_unstable_by_key(|(slot, _)| *slot);
(address, HashedStorageSorted { storage_slots: slots, wiped: false })
})
.collect();
Ok(Self { accounts: hashed_accounts, storages: hashed_storages })
Ok(Self::new(accounts, hashed_storages))
}
}
#[cfg(test)]
mod tests {
use super::*;
use alloy_primitives::{hex, map::HashMap, Address, U256};
use alloy_primitives::{hex, map::HashMap, Address, B256, U256};
use reth_db::test_utils::create_test_rw_db;
use reth_db_api::database::Database;
use reth_trie::KeccakKeyHasher;
use reth_db_api::{
database::Database,
models::{AccountBeforeTx, BlockNumberAddress},
tables,
transaction::DbTxMut,
};
use reth_primitives_traits::{Account, StorageEntry};
use reth_trie::{HashedPostState, HashedStorage, KeccakKeyHasher};
use revm::state::AccountInfo;
use revm_database::BundleState;
/// Overlay root calculation works with sorted state.
#[test]
fn overlay_root_with_sorted_state() {
let db = create_test_rw_db();
let tx = db.tx().expect("failed to create transaction");
let mut hashed_state = HashedPostState::default();
hashed_state.accounts.insert(
B256::from(U256::from(1)),
Some(Account { nonce: 1, balance: U256::from(10), bytecode_hash: None }),
);
hashed_state.accounts.insert(B256::from(U256::from(2)), None);
hashed_state.storages.insert(
B256::from(U256::from(1)),
HashedStorage::from_iter(false, [(B256::from(U256::from(3)), U256::from(30))]),
);
let sorted = hashed_state.into_sorted();
let overlay_root = StateRoot::overlay_root(&tx, &sorted).unwrap();
// Just verify it produces a valid root
assert!(!overlay_root.is_zero());
}
/// Builds hashed state from a bundle and checks the known state root.
#[test]
fn from_bundle_state_with_rayon() {
let address1 = Address::with_last_byte(1);
@@ -308,8 +359,102 @@ mod tests {
let db = create_test_rw_db();
let tx = db.tx().expect("failed to create transaction");
assert_eq!(
StateRoot::overlay_root(&tx, post_state).unwrap(),
StateRoot::overlay_root(&tx, &post_state.into_sorted()).unwrap(),
hex!("b464525710cafcf5d4044ac85b72c08b1e76231b8d91f288fe438cc41d8eaafd")
);
}
/// Verifies `from_reverts` keeps first occurrence per key and preserves ordering guarantees.
#[test]
fn from_reverts_keeps_first_occurrence_and_ordering() {
let db = create_test_rw_db();
let tx = db.tx_mut().expect("failed to create rw tx");
let address1 = Address::with_last_byte(1);
let address2 = Address::with_last_byte(2);
let slot1 = B256::from(U256::from(11));
let slot2 = B256::from(U256::from(22));
// Account changesets: only first occurrence per address should be kept.
tx.put::<tables::AccountChangeSets>(
1,
AccountBeforeTx {
address: address1,
info: Some(Account { nonce: 1, ..Default::default() }),
},
)
.unwrap();
tx.put::<tables::AccountChangeSets>(
2,
AccountBeforeTx {
address: address1,
info: Some(Account { nonce: 2, ..Default::default() }),
},
)
.unwrap();
tx.put::<tables::AccountChangeSets>(3, AccountBeforeTx { address: address2, info: None })
.unwrap();
// Storage changesets: only first occurrence per slot should be kept, and slots sorted.
tx.put::<tables::StorageChangeSets>(
BlockNumberAddress((1, address1)),
StorageEntry { key: slot2, value: U256::from(200) },
)
.unwrap();
tx.put::<tables::StorageChangeSets>(
BlockNumberAddress((2, address1)),
StorageEntry { key: slot1, value: U256::from(100) },
)
.unwrap();
tx.put::<tables::StorageChangeSets>(
BlockNumberAddress((3, address1)),
StorageEntry { key: slot1, value: U256::from(999) }, // should be ignored
)
.unwrap();
tx.commit().unwrap();
let tx = db.tx().expect("failed to create ro tx");
let sorted = HashedPostStateSorted::from_reverts::<KeccakKeyHasher>(&tx, 1..=3).unwrap();
// Verify first occurrences were kept (nonce 1, not 2)
assert_eq!(sorted.accounts.len(), 2);
let hashed_addr1 = KeccakKeyHasher::hash_key(address1);
let account1 = sorted.accounts.iter().find(|(addr, _)| *addr == hashed_addr1).unwrap();
assert_eq!(account1.1.unwrap().nonce, 1);
// Ordering guarantees - accounts sorted by hashed address
assert!(sorted.accounts.windows(2).all(|w| w[0].0 <= w[1].0));
// Ordering guarantees - storage slots sorted by hashed slot
for storage in sorted.storages.values() {
assert!(storage.storage_slots.windows(2).all(|w| w[0].0 <= w[1].0));
}
}
/// Empty block range returns empty state.
#[test]
fn from_reverts_empty_range() {
let db = create_test_rw_db();
// Insert data outside the query range
db.update(|tx| {
tx.put::<tables::AccountChangeSets>(
100,
AccountBeforeTx {
address: Address::with_last_byte(1),
info: Some(Account { nonce: 1, ..Default::default() }),
},
)
.unwrap();
})
.unwrap();
let tx = db.tx().unwrap();
// Query a range with no data
let sorted = HashedPostStateSorted::from_reverts::<KeccakKeyHasher>(&tx, 1..=10).unwrap();
assert!(sorted.accounts.is_empty());
assert!(sorted.storages.is_empty());
}
}

View File

@@ -310,9 +310,11 @@ fn run_case(
// Compute and check the post state root
let hashed_state =
HashedPostState::from_bundle_state::<KeccakKeyHasher>(output.state.state());
let (computed_state_root, _) =
StateRoot::overlay_root_with_updates(provider.tx_ref(), hashed_state.clone())
.map_err(|err| Error::block_failed(block_number, program_inputs.clone(), err))?;
let (computed_state_root, _) = StateRoot::overlay_root_with_updates(
provider.tx_ref(),
&hashed_state.clone_into_sorted(),
)
.map_err(|err| Error::block_failed(block_number, program_inputs.clone(), err))?;
if computed_state_root != block.state_root {
return Err(Error::block_failed(
block_number,