From b0f22be5ae32b7ebac45d71b3794e3a7850a7bae Mon Sep 17 00:00:00 2001 From: Roman Krasiuk Date: Mon, 24 Apr 2023 20:25:48 +0300 Subject: [PATCH] feat(trie): hashed cursor (#2288) --- crates/storage/provider/Cargo.toml | 1 + crates/storage/provider/src/post_state.rs | 257 ++++++- crates/trie/src/hashed_cursor/default.rs | 50 ++ crates/trie/src/hashed_cursor/mod.rs | 47 ++ crates/trie/src/hashed_cursor/post_state.rs | 764 ++++++++++++++++++++ crates/trie/src/lib.rs | 3 + crates/trie/src/trie.rs | 129 +++- 7 files changed, 1218 insertions(+), 33 deletions(-) create mode 100644 crates/trie/src/hashed_cursor/default.rs create mode 100644 crates/trie/src/hashed_cursor/mod.rs create mode 100644 crates/trie/src/hashed_cursor/post_state.rs diff --git a/crates/storage/provider/Cargo.toml b/crates/storage/provider/Cargo.toml index 7843730637..41f14e098b 100644 --- a/crates/storage/provider/Cargo.toml +++ b/crates/storage/provider/Cargo.toml @@ -34,6 +34,7 @@ parking_lot = { version = "0.12", optional = true } reth-db = { path = "../db", features = ["test-utils"] } reth-primitives = { path = "../../primitives", features = ["arbitrary", "test-utils"] } reth-rlp = { path = "../../rlp" } +reth-trie = { path = "../../trie", features = ["test-utils"] } parking_lot = "0.12" [features] diff --git a/crates/storage/provider/src/post_state.rs b/crates/storage/provider/src/post_state.rs index a71aaf3448..651ae38f2c 100644 --- a/crates/storage/provider/src/post_state.rs +++ b/crates/storage/provider/src/post_state.rs @@ -7,8 +7,12 @@ use reth_db::{ Error as DbError, }; use reth_primitives::{ - bloom::logs_bloom, proofs::calculate_receipt_root_ref, Account, Address, BlockNumber, Bloom, - Bytecode, Log, Receipt, StorageEntry, H256, U256, + bloom::logs_bloom, keccak256, proofs::calculate_receipt_root_ref, Account, Address, + BlockNumber, Bloom, Bytecode, Log, Receipt, StorageEntry, H256, U256, +}; +use reth_trie::{ + hashed_cursor::{HashedPostState, HashedPostStateCursorFactory, HashedStorage}, + StateRoot, StateRootError, }; use std::collections::BTreeMap; @@ -177,6 +181,77 @@ impl PostState { calculate_receipt_root_ref(self.receipts().iter().map(Into::into)) } + /// Hash all changed accounts and storage entries that are currently stored in the post state. + /// + /// # Returns + /// + /// The hashed post state. + pub fn hash_state_slow(&self) -> HashedPostState { + let mut accounts = BTreeMap::default(); + for (address, account) in self.accounts() { + accounts.insert(keccak256(address), *account); + } + + let mut storages = BTreeMap::default(); + for (address, storage) in self.storage() { + let mut hashed_storage = BTreeMap::default(); + for (slot, value) in &storage.storage { + hashed_storage.insert(keccak256(H256(slot.to_be_bytes())), *value); + } + storages.insert( + keccak256(address), + HashedStorage { wiped: storage.wiped, storage: hashed_storage }, + ); + } + + HashedPostState { accounts, storages } + } + + /// Calculate the state root for this [PostState]. + /// Internally, function calls [Self::hash_state_slow] to obtain the [HashedPostState]. + /// Afterwards, it retrieves the prefixsets from the [HashedPostState] and uses them to + /// calculate the incremental state root. + /// + /// # Example + /// + /// ``` + /// use reth_primitives::{Address, Account}; + /// use reth_provider::PostState; + /// use reth_db::{mdbx::{EnvKind, WriteMap, test_utils::create_test_db}, database::Database}; + /// + /// // Initialize the database + /// let db = create_test_db::(EnvKind::RW); + /// + /// // Initialize the post state + /// let mut post_state = PostState::new(); + /// + /// // Create an account + /// let block_number = 1; + /// let address = Address::random(); + /// post_state.create_account(1, address, Account { nonce: 1, ..Default::default() }); + /// + /// // Calculate the state root + /// let tx = db.tx().expect("failed to create transaction"); + /// let state_root = post_state.state_root_slow(&tx); + /// ``` + /// + /// # Returns + /// + /// The state root for this [PostState]. + pub fn state_root_slow<'a, 'tx, TX: DbTx<'tx>>( + &self, + tx: &'a TX, + ) -> Result { + let hashed_post_state = self.hash_state_slow(); + let (account_prefix_set, storage_prefix_set) = hashed_post_state.construct_prefix_sets(); + let hashed_cursor_factory = HashedPostStateCursorFactory::new(tx, &hashed_post_state); + StateRoot::new(tx) + .with_hashed_cursor_factory(&hashed_cursor_factory) + .with_changed_account_prefixes(account_prefix_set) + .with_changed_storage_prefixes(storage_prefix_set) + .root() + } + // todo: note overwrite behavior, i.e. changes in `other` take precedent /// Extend this [PostState] with the changes in another [PostState]. pub fn extend(&mut self, mut other: PostState) { @@ -486,6 +561,8 @@ mod tests { mdbx::{test_utils, Env, EnvKind, WriteMap}, transaction::DbTx, }; + use reth_primitives::proofs::EMPTY_ROOT; + use reth_trie::test_utils::state_root; use std::sync::Arc; // Ensure that the transition id is not incremented if postate is extended by another empty @@ -1083,4 +1160,180 @@ mod tests { "The latest state of the storage is incorrect in the merged state" ); } + + #[test] + fn empty_post_state_state_root() { + let db: Arc> = test_utils::create_test_db(EnvKind::RW); + let tx = db.tx().unwrap(); + + let post_state = PostState::new(); + let state_root = post_state.state_root_slow(&tx).expect("Could not get state root"); + assert_eq!(state_root, EMPTY_ROOT); + } + + #[test] + fn post_state_state_root() { + let mut state: BTreeMap)> = (0..10) + .into_iter() + .map(|key| { + let account = Account { nonce: 1, balance: U256::from(key), bytecode_hash: None }; + let storage = (0..10) + .into_iter() + .map(|key| (H256::from_low_u64_be(key), U256::from(key))) + .collect(); + (Address::from_low_u64_be(key), (account, storage)) + }) + .collect(); + + let db: Arc> = test_utils::create_test_db(EnvKind::RW); + + // insert initial state to the database + db.update(|tx| { + for (address, (account, storage)) in state.iter() { + let hashed_address = keccak256(&address); + tx.put::(hashed_address, *account).unwrap(); + for (slot, value) in storage { + tx.put::( + hashed_address, + StorageEntry { key: keccak256(slot), value: *value }, + ) + .unwrap(); + } + } + + let (_, updates) = StateRoot::new(tx).root_with_updates().unwrap(); + updates.flush(tx).unwrap(); + }) + .unwrap(); + + let block_number = 1; + let tx = db.tx().unwrap(); + let mut post_state = PostState::new(); + + // database only state root is correct + assert_eq!( + post_state.state_root_slow(&tx).unwrap(), + state_root( + state + .clone() + .into_iter() + .map(|(address, (account, storage))| (address, (account, storage.into_iter()))) + ) + ); + + // destroy account 1 + let address_1 = Address::from_low_u64_be(1); + let account_1_old = state.remove(&address_1).unwrap(); + post_state.destroy_account(block_number, address_1, account_1_old.0); + assert_eq!( + post_state.state_root_slow(&tx).unwrap(), + state_root( + state + .clone() + .into_iter() + .map(|(address, (account, storage))| (address, (account, storage.into_iter()))) + ) + ); + + // change slot 2 in account 2 + let address_2 = Address::from_low_u64_be(2); + let slot_2 = U256::from(2); + let slot_2_key = H256(slot_2.to_be_bytes()); + let address_2_slot_2_old_value = + state.get(&address_2).unwrap().1.get(&slot_2_key).unwrap().clone(); + let address_2_slot_2_new_value = U256::from(100); + state.get_mut(&address_2).unwrap().1.insert(slot_2_key, address_2_slot_2_new_value); + post_state.change_storage( + block_number, + address_2, + BTreeMap::from([(slot_2, (address_2_slot_2_old_value, address_2_slot_2_new_value))]), + ); + assert_eq!( + post_state.state_root_slow(&tx).unwrap(), + state_root( + state + .clone() + .into_iter() + .map(|(address, (account, storage))| (address, (account, storage.into_iter()))) + ) + ); + + // change balance of account 3 + let address_3 = Address::from_low_u64_be(3); + let address_3_account_old = state.get(&address_3).unwrap().0; + let address_3_account_new = + Account { balance: U256::from(24), ..address_3_account_old.clone() }; + state.get_mut(&address_3).unwrap().0.balance = address_3_account_new.balance; + post_state.change_account( + block_number, + address_3, + address_3_account_old, + address_3_account_new, + ); + assert_eq!( + post_state.state_root_slow(&tx).unwrap(), + state_root( + state + .clone() + .into_iter() + .map(|(address, (account, storage))| (address, (account, storage.into_iter()))) + ) + ); + + // change nonce of account 4 + let address_4 = Address::from_low_u64_be(4); + let address_4_account_old = state.get(&address_4).unwrap().0; + let address_4_account_new = Account { nonce: 128, ..address_4_account_old.clone() }; + state.get_mut(&address_4).unwrap().0.nonce = address_4_account_new.nonce; + post_state.change_account( + block_number, + address_4, + address_4_account_old, + address_4_account_new, + ); + assert_eq!( + post_state.state_root_slow(&tx).unwrap(), + state_root( + state + .clone() + .into_iter() + .map(|(address, (account, storage))| (address, (account, storage.into_iter()))) + ) + ); + + // recreate account 1 + let account_1_new = + Account { nonce: 56, balance: U256::from(123), bytecode_hash: Some(H256::random()) }; + state.insert(address_1, (account_1_new, BTreeMap::default())); + post_state.create_account(block_number, address_1, account_1_new); + assert_eq!( + post_state.state_root_slow(&tx).unwrap(), + state_root( + state + .clone() + .into_iter() + .map(|(address, (account, storage))| (address, (account, storage.into_iter()))) + ) + ); + + // update storage for account 1 + let slot_20 = U256::from(20); + let slot_20_key = H256(slot_20.to_be_bytes()); + let account_1_slot_20_value = U256::from(12345); + state.get_mut(&address_1).unwrap().1.insert(slot_20_key, account_1_slot_20_value); + post_state.change_storage( + block_number, + address_1, + BTreeMap::from([(slot_20, (U256::from(0), account_1_slot_20_value))]), + ); + assert_eq!( + post_state.state_root_slow(&tx).unwrap(), + state_root( + state + .clone() + .into_iter() + .map(|(address, (account, storage))| (address, (account, storage.into_iter()))) + ) + ); + } } diff --git a/crates/trie/src/hashed_cursor/default.rs b/crates/trie/src/hashed_cursor/default.rs new file mode 100644 index 0000000000..e90ac10210 --- /dev/null +++ b/crates/trie/src/hashed_cursor/default.rs @@ -0,0 +1,50 @@ +use super::{HashedAccountCursor, HashedCursorFactory, HashedStorageCursor}; +use reth_db::{ + cursor::{DbCursorRO, DbDupCursorRO}, + tables, + transaction::{DbTx, DbTxGAT}, +}; +use reth_primitives::{Account, StorageEntry, H256}; + +impl<'a, 'tx, TX: DbTx<'tx>> HashedCursorFactory<'a> for TX { + type AccountCursor = >::Cursor where Self: 'a; + type StorageCursor = >::DupCursor where Self: 'a; + + fn hashed_account_cursor(&'a self) -> Result { + self.cursor_read::() + } + + fn hashed_storage_cursor(&'a self) -> Result { + self.cursor_dup_read::() + } +} + +impl<'tx, C> HashedAccountCursor for C +where + C: DbCursorRO<'tx, tables::HashedAccount>, +{ + fn seek(&mut self, key: H256) -> Result, reth_db::Error> { + self.seek(key) + } + + fn next(&mut self) -> Result, reth_db::Error> { + self.next() + } +} + +impl<'tx, C> HashedStorageCursor for C +where + C: DbCursorRO<'tx, tables::HashedStorage> + DbDupCursorRO<'tx, tables::HashedStorage>, +{ + fn is_empty(&mut self, key: H256) -> Result { + Ok(self.seek_exact(key)?.is_none()) + } + + fn seek(&mut self, key: H256, subkey: H256) -> Result, reth_db::Error> { + self.seek_by_key_subkey(key, subkey) + } + + fn next(&mut self) -> Result, reth_db::Error> { + self.next_dup_val() + } +} diff --git a/crates/trie/src/hashed_cursor/mod.rs b/crates/trie/src/hashed_cursor/mod.rs new file mode 100644 index 0000000000..7f1592f1b4 --- /dev/null +++ b/crates/trie/src/hashed_cursor/mod.rs @@ -0,0 +1,47 @@ +use reth_primitives::{Account, StorageEntry, H256}; + +/// Default implementation of the hashed state cursor traits. +mod default; + +/// Implementation of hashed state cursor traits for the post state. +mod post_state; +pub use post_state::*; + +/// The factory trait for creating cursors over the hashed state. +pub trait HashedCursorFactory<'a> { + /// The hashed account cursor type. + type AccountCursor: HashedAccountCursor + where + Self: 'a; + /// The hashed storage cursor type. + type StorageCursor: HashedStorageCursor + where + Self: 'a; + + /// Returns a cursor for iterating over all hashed accounts in the state. + fn hashed_account_cursor(&'a self) -> Result; + + /// Returns a cursor for iterating over all hashed storage entries in the state. + fn hashed_storage_cursor(&'a self) -> Result; +} + +/// The cursor for iterating over hashed accounts. +pub trait HashedAccountCursor { + /// Seek an entry greater or equal to the given key and position the cursor there. + fn seek(&mut self, key: H256) -> Result, reth_db::Error>; + + /// Move the cursor to the next entry and return it. + fn next(&mut self) -> Result, reth_db::Error>; +} + +/// The cursor for iterating over hashed storage entries. +pub trait HashedStorageCursor { + /// Returns `true` if there are no entries for a given key. + fn is_empty(&mut self, key: H256) -> Result; + + /// Seek an entry greater or equal to the given key/subkey and position the cursor there. + fn seek(&mut self, key: H256, subkey: H256) -> Result, reth_db::Error>; + + /// Move the cursor to the next entry and return it. + fn next(&mut self) -> Result, reth_db::Error>; +} diff --git a/crates/trie/src/hashed_cursor/post_state.rs b/crates/trie/src/hashed_cursor/post_state.rs new file mode 100644 index 0000000000..c82f67b36c --- /dev/null +++ b/crates/trie/src/hashed_cursor/post_state.rs @@ -0,0 +1,764 @@ +use crate::{prefix_set::PrefixSet, Nibbles}; + +use super::{HashedAccountCursor, HashedCursorFactory, HashedStorageCursor}; +use reth_db::{ + cursor::{DbCursorRO, DbDupCursorRO}, + tables, + transaction::{DbTx, DbTxGAT}, +}; +use reth_primitives::{Account, StorageEntry, H256, U256}; +use std::collections::{BTreeMap, HashMap}; + +/// The post state account storage with hashed slots. +#[derive(Debug, Default, Clone, Eq, PartialEq)] +pub struct HashedStorage { + /// Whether the storage was wiped or not. + pub wiped: bool, + /// Hashed storage slots. + pub storage: BTreeMap, +} + +/// The post state with hashed addresses as keys. +#[derive(Debug, Default, Clone, Eq, PartialEq)] +pub struct HashedPostState { + /// Map of hashed addresses to account info. + pub accounts: BTreeMap>, + /// Map of hashed addresses to hashed storage. + pub storages: BTreeMap, +} + +impl HashedPostState { + /// Construct prefix sets from hashed post state. + pub fn construct_prefix_sets(&self) -> (PrefixSet, HashMap) { + // Initialize prefix sets. + let mut account_prefix_set = PrefixSet::default(); + let mut storage_prefix_set: HashMap = HashMap::default(); + + for hashed_address in self.accounts.keys() { + account_prefix_set.insert(Nibbles::unpack(hashed_address)); + } + + for (hashed_address, hashed_storage) in self.storages.iter() { + account_prefix_set.insert(Nibbles::unpack(hashed_address)); + for hashed_slot in hashed_storage.storage.keys() { + storage_prefix_set + .entry(*hashed_address) + .or_default() + .insert(Nibbles::unpack(hashed_slot)); + } + } + + (account_prefix_set, storage_prefix_set) + } +} + +/// The hashed cursor factory for the post state. +pub struct HashedPostStateCursorFactory<'a, 'b, TX> { + tx: &'a TX, + post_state: &'b HashedPostState, +} + +impl<'a, 'b, TX> HashedPostStateCursorFactory<'a, 'b, TX> { + /// Create a new factory. + pub fn new(tx: &'a TX, post_state: &'b HashedPostState) -> Self { + Self { tx, post_state } + } +} + +impl<'a, 'b, 'tx, TX: DbTx<'tx>> HashedCursorFactory<'a> + for HashedPostStateCursorFactory<'a, 'b, TX> +where + 'a: 'b, +{ + type AccountCursor = HashedPostStateAccountCursor<'b, >::Cursor> where Self: 'a ; + type StorageCursor = HashedPostStateStorageCursor<'b, >::DupCursor> where Self: 'a; + + fn hashed_account_cursor(&'a self) -> Result { + let cursor = self.tx.cursor_read::()?; + Ok(HashedPostStateAccountCursor { post_state: self.post_state, cursor, last_account: None }) + } + + fn hashed_storage_cursor(&'a self) -> Result { + let cursor = self.tx.cursor_dup_read::()?; + Ok(HashedPostStateStorageCursor { + post_state: self.post_state, + cursor, + account: None, + last_slot: None, + }) + } +} + +/// The cursor to iterate over post state hashed accounts and corresponding database entries. +/// It will always give precedence to the data from the post state. +#[derive(Debug, Clone)] +pub struct HashedPostStateAccountCursor<'b, C> { + cursor: C, + post_state: &'b HashedPostState, + last_account: Option, +} + +impl<'b, 'tx, C> HashedPostStateAccountCursor<'b, C> +where + C: DbCursorRO<'tx, tables::HashedAccount>, +{ + fn was_account_cleared(&self, account: &H256) -> bool { + matches!(self.post_state.accounts.get(account), Some(None)) + } + + fn next_account( + &self, + post_state_item: Option<(H256, Account)>, + db_item: Option<(H256, Account)>, + ) -> Result, reth_db::Error> { + let result = match (post_state_item, db_item) { + // If both are not empty, return the smallest of the two + // Post state is given precedence if keys are equal + (Some((post_state_address, post_state_account)), Some((db_address, db_account))) => { + if post_state_address <= db_address { + Some((post_state_address, post_state_account)) + } else { + Some((db_address, db_account)) + } + } + // If the database is empty, return the post state entry + (Some((post_state_address, post_state_account)), None) => { + Some((post_state_address, post_state_account)) + } + // If the post state is empty, return the database entry + (None, Some((db_address, db_account))) => Some((db_address, db_account)), + // If both are empty, return None + (None, None) => None, + }; + Ok(result) + } +} + +impl<'b, 'tx, C> HashedAccountCursor for HashedPostStateAccountCursor<'b, C> +where + C: DbCursorRO<'tx, tables::HashedAccount>, +{ + fn seek(&mut self, key: H256) -> Result, reth_db::Error> { + self.last_account = None; + + // Attempt to find the account in poststate. + let post_state_item = self + .post_state + .accounts + .iter() + .find_map(|(k, v)| v.filter(|_| k >= &key).map(|v| (*k, v))); + if let Some((address, account)) = post_state_item { + // It's an exact match, return the account from post state without looking up in the + // database. + if address == key { + self.last_account = Some(address); + return Ok(Some((address, account))) + } + } + + // It's not an exact match, reposition to the first greater or equal account that wasn't + // cleared. + let mut db_item = self.cursor.seek(key)?; + while db_item + .as_ref() + .map(|(address, _)| self.was_account_cleared(address)) + .unwrap_or_default() + { + db_item = self.cursor.next()?; + } + + let result = self.next_account(post_state_item, db_item)?; + self.last_account = result.as_ref().map(|(address, _)| *address); + Ok(result) + } + + fn next(&mut self) -> Result, reth_db::Error> { + let last_account = match self.last_account.as_ref() { + Some(account) => account, + None => return Ok(None), // no previous entry was found + }; + + // If post state was given precedence, move the cursor forward. + let mut db_item = self.cursor.current()?; + while db_item + .as_ref() + .map(|(address, _)| address <= last_account || self.was_account_cleared(address)) + .unwrap_or_default() + { + db_item = self.cursor.next()?; + } + + let post_state_item = self + .post_state + .accounts + .iter() + .find(|(k, v)| k > &last_account && v.is_some()) + .map(|(address, info)| (*address, info.unwrap())); + let result = self.next_account(post_state_item, db_item)?; + self.last_account = result.as_ref().map(|(address, _)| *address); + Ok(result) + } +} + +/// The cursor to iterate over post state hashed storages and corresponding database entries. +/// It will always give precedence to the data from the post state. +#[derive(Debug, Clone)] +pub struct HashedPostStateStorageCursor<'b, C> { + post_state: &'b HashedPostState, + cursor: C, + account: Option, + last_slot: Option, +} + +impl<'b, C> HashedPostStateStorageCursor<'b, C> { + fn was_storage_wiped(&self, account: &H256) -> bool { + match self.post_state.storages.get(account) { + Some(storage) => storage.wiped, + None => false, + } + } + + fn next_slot( + &self, + post_state_item: Option<(&H256, &U256)>, + db_item: Option, + ) -> Result, reth_db::Error> { + let result = match (post_state_item, db_item) { + // If both are not empty, return the smallest of the two + // Post state is given precedence if keys are equal + (Some((post_state_slot, post_state_value)), Some(db_entry)) => { + if post_state_slot <= &db_entry.key { + Some(StorageEntry { key: *post_state_slot, value: *post_state_value }) + } else { + Some(db_entry) + } + } + // If the database is empty, return the post state entry + (Some((post_state_slot, post_state_value)), None) => { + Some(StorageEntry { key: *post_state_slot, value: *post_state_value }) + } + // If the post state is empty, return the database entry + (None, Some(db_entry)) => Some(db_entry), + // If both are empty, return None + (None, None) => None, + }; + Ok(result) + } +} + +impl<'b, 'tx, C> HashedStorageCursor for HashedPostStateStorageCursor<'b, C> +where + C: DbCursorRO<'tx, tables::HashedStorage> + DbDupCursorRO<'tx, tables::HashedStorage>, +{ + fn is_empty(&mut self, key: H256) -> Result { + let is_empty = match self.post_state.storages.get(&key) { + Some(storage) => storage.wiped && storage.storage.is_empty(), + None => self.cursor.seek_exact(key)?.is_none(), + }; + Ok(is_empty) + } + + fn seek(&mut self, key: H256, subkey: H256) -> Result, reth_db::Error> { + self.last_slot = None; + self.account = Some(key); + + // Attempt to find the account's storage in poststate. + let post_state_item = self + .post_state + .storages + .get(&key) + .map(|storage| storage.storage.iter().skip_while(|(slot, _)| slot <= &&subkey)) + .and_then(|mut iter| iter.next()); + if let Some((slot, value)) = post_state_item { + // It's an exact match, return the storage slot from post state without looking up in + // the database. + if slot == &subkey { + self.last_slot = Some(*slot); + return Ok(Some(StorageEntry { key: *slot, value: *value })) + } + } + + // It's not an exact match, reposition to the first greater or equal account. + let db_item = if self.was_storage_wiped(&key) { + None + } else { + self.cursor.seek_by_key_subkey(key, subkey)? + }; + + let result = self.next_slot(post_state_item, db_item)?; + self.last_slot = result.as_ref().map(|entry| entry.key); + Ok(result) + } + + fn next(&mut self) -> Result, reth_db::Error> { + let account = self.account.expect("`seek` must be called first"); + + let last_slot = match self.last_slot.as_ref() { + Some(account) => account, + None => return Ok(None), // no previous entry was found + }; + + let db_item = if self.was_storage_wiped(&account) { + None + } else { + // If post state was given precedence, move the cursor forward. + let mut db_item = self.cursor.seek_by_key_subkey(account, *last_slot)?; + + // If the entry was already returned, move to the next. + if db_item.as_ref().map(|entry| &entry.key == last_slot).unwrap_or_default() { + db_item = self.cursor.next_dup_val()?; + } + + db_item + }; + + let post_state_item = self + .post_state + .storages + .get(&account) + .map(|storage| storage.storage.iter().skip_while(|(slot, _)| slot <= &last_slot)) + .and_then(|mut iter| iter.next()); + let result = self.next_slot(post_state_item, db_item)?; + self.last_slot = result.as_ref().map(|entry| entry.key); + Ok(result) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use proptest::prelude::*; + use reth_db::{database::Database, mdbx::test_utils::create_test_rw_db, transaction::DbTxMut}; + + fn assert_account_cursor_order<'a, 'b>( + factory: &'a impl HashedCursorFactory<'b>, + mut expected: impl Iterator, + ) where + 'a: 'b, + { + let mut cursor = factory.hashed_account_cursor().unwrap(); + + let first_account = cursor.seek(H256::default()).unwrap(); + assert_eq!(first_account, expected.next()); + + while let Some(expected) = expected.next() { + let next_cursor_account = cursor.next().unwrap(); + assert_eq!(next_cursor_account, Some(expected)); + } + + assert!(cursor.next().unwrap().is_none()); + } + + fn assert_storage_cursor_order<'a, 'b>( + factory: &'a impl HashedCursorFactory<'b>, + expected: impl Iterator)>, + ) where + 'a: 'b, + { + let mut cursor = factory.hashed_storage_cursor().unwrap(); + + for (account, storage) in expected { + let mut expected_storage = storage.into_iter(); + + let first_storage = cursor.seek(account, H256::default()).unwrap(); + assert_eq!(first_storage.map(|e| (e.key, e.value)), expected_storage.next()); + + while let Some(expected_entry) = expected_storage.next() { + let next_cursor_storage = cursor.next().unwrap(); + assert_eq!(next_cursor_storage.map(|e| (e.key, e.value)), Some(expected_entry)); + } + + assert!(cursor.next().unwrap().is_none()); + } + } + + #[test] + fn post_state_only_accounts() { + let accounts = Vec::from_iter( + (1..11).into_iter().map(|key| (H256::from_low_u64_be(key), Account::default())), + ); + let post_state = HashedPostState { + accounts: BTreeMap::from_iter( + accounts.iter().map(|(key, account)| (*key, Some(*account))), + ), + storages: Default::default(), + }; + + let db = create_test_rw_db(); + let tx = db.tx().unwrap(); + + let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + assert_account_cursor_order(&factory, accounts.into_iter()); + } + + #[test] + fn db_only_accounts() { + let accounts = Vec::from_iter( + (1..11).into_iter().map(|key| (H256::from_low_u64_be(key), Account::default())), + ); + + let db = create_test_rw_db(); + db.update(|tx| { + for (key, account) in accounts.iter() { + tx.put::(*key, *account).unwrap(); + } + }) + .unwrap(); + + let tx = db.tx().unwrap(); + let post_state = HashedPostState::default(); + let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + assert_account_cursor_order(&factory, accounts.into_iter()); + } + + #[test] + fn account_cursor_correct_order() { + // odd keys are in post state, even keys are in db + let accounts = Vec::from_iter( + (1..111).into_iter().map(|key| (H256::from_low_u64_be(key), Account::default())), + ); + + let db = create_test_rw_db(); + db.update(|tx| { + for (key, account) in accounts.iter().filter(|x| x.0.to_low_u64_be() % 2 == 0) { + tx.put::(*key, *account).unwrap(); + } + }) + .unwrap(); + + let post_state = HashedPostState { + accounts: BTreeMap::from_iter( + accounts + .iter() + .filter(|x| x.0.to_low_u64_be() % 2 != 0) + .map(|(key, account)| (*key, Some(*account))), + ), + storages: Default::default(), + }; + + let tx = db.tx().unwrap(); + let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + assert_account_cursor_order(&factory, accounts.into_iter()); + } + + #[test] + fn removed_accounts_are_omitted() { + // odd keys are in post state, even keys are in db + let accounts = Vec::from_iter( + (1..111).into_iter().map(|key| (H256::from_low_u64_be(key), Account::default())), + ); + // accounts 5, 9, 11 should be considered removed from post state + let removed_keys = Vec::from_iter([5, 9, 11].into_iter().map(H256::from_low_u64_be)); + + let db = create_test_rw_db(); + db.update(|tx| { + for (key, account) in accounts.iter().filter(|x| x.0.to_low_u64_be() % 2 == 0) { + tx.put::(*key, *account).unwrap(); + } + }) + .unwrap(); + + let post_state = HashedPostState { + accounts: BTreeMap::from_iter( + accounts.iter().filter(|x| x.0.to_low_u64_be() % 2 != 0).map(|(key, account)| { + (*key, if removed_keys.contains(key) { None } else { Some(*account) }) + }), + ), + storages: Default::default(), + }; + + let tx = db.tx().unwrap(); + let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + let expected = accounts.into_iter().filter(|x| !removed_keys.contains(&x.0)); + assert_account_cursor_order(&factory, expected); + } + + #[test] + fn post_state_accounts_take_precedence() { + let accounts = + Vec::from_iter((1..10).into_iter().map(|key| { + (H256::from_low_u64_be(key), Account { nonce: key, ..Default::default() }) + })); + + let db = create_test_rw_db(); + db.update(|tx| { + for (key, _) in accounts.iter() { + // insert zero value accounts to the database + tx.put::(*key, Account::default()).unwrap(); + } + }) + .unwrap(); + + let post_state = HashedPostState { + accounts: BTreeMap::from_iter( + accounts.iter().map(|(key, account)| (*key, Some(*account))), + ), + storages: Default::default(), + }; + + let tx = db.tx().unwrap(); + let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + assert_account_cursor_order(&factory, accounts.into_iter()); + } + + #[test] + fn fuzz_hashed_account_cursor() { + proptest!(ProptestConfig::with_cases(10), |(db_accounts: BTreeMap, post_state_accounts: BTreeMap>)| { + let db = create_test_rw_db(); + db.update(|tx| { + for (key, account) in db_accounts.iter() { + tx.put::(*key, *account).unwrap(); + } + }) + .unwrap(); + + let post_state = HashedPostState { + accounts: BTreeMap::from_iter( + post_state_accounts.iter().map(|(key, account)| (*key, *account)), + ), + storages: Default::default(), + }; + + let mut expected = db_accounts; + // overwrite or remove accounts from the expected result + for (key, account) in post_state_accounts.iter() { + if let Some(account) = account { + expected.insert(*key, *account); + } else { + expected.remove(key); + } + } + + let tx = db.tx().unwrap(); + let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + assert_account_cursor_order(&factory, expected.into_iter()); + } + ); + } + + #[test] + fn storage_is_empty() { + let address = H256::random(); + let db = create_test_rw_db(); + + // empty from the get go + { + let post_state = HashedPostState::default(); + let tx = db.tx().unwrap(); + let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + let mut cursor = factory.hashed_storage_cursor().unwrap(); + assert!(cursor.is_empty(address).unwrap()); + } + + let db_storage = BTreeMap::from_iter( + (0..10).into_iter().map(|key| (H256::from_low_u64_be(key), U256::from(key))), + ); + db.update(|tx| { + for (slot, value) in db_storage.iter() { + // insert zero value accounts to the database + tx.put::( + address, + StorageEntry { key: *slot, value: *value }, + ) + .unwrap(); + } + }) + .unwrap(); + + // not empty + { + let post_state = HashedPostState::default(); + let tx = db.tx().unwrap(); + let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + let mut cursor = factory.hashed_storage_cursor().unwrap(); + assert!(!cursor.is_empty(address).unwrap()); + } + + // wiped storage, must be empty + { + let post_state = HashedPostState { + accounts: BTreeMap::default(), + storages: BTreeMap::from_iter([( + address, + HashedStorage { wiped: true, ..Default::default() }, + )]), + }; + let tx = db.tx().unwrap(); + let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + let mut cursor = factory.hashed_storage_cursor().unwrap(); + assert!(cursor.is_empty(address).unwrap()); + } + + // wiped storage, but post state has entries + { + let post_state = HashedPostState { + accounts: BTreeMap::default(), + storages: BTreeMap::from_iter([( + address, + HashedStorage { + wiped: true, + storage: BTreeMap::from_iter([(H256::random(), U256::ZERO)]), + }, + )]), + }; + let tx = db.tx().unwrap(); + let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + let mut cursor = factory.hashed_storage_cursor().unwrap(); + assert!(!cursor.is_empty(address).unwrap()); + } + } + + #[test] + fn storage_cursor_correct_order() { + let address = H256::random(); + let db_storage = BTreeMap::from_iter( + (0..10).into_iter().map(|key| (H256::from_low_u64_be(key), U256::from(key))), + ); + let post_state_storage = BTreeMap::from_iter( + (10..20).into_iter().map(|key| (H256::from_low_u64_be(key), U256::from(key))), + ); + + let db = create_test_rw_db(); + db.update(|tx| { + for (slot, value) in db_storage.iter() { + // insert zero value accounts to the database + tx.put::( + address, + StorageEntry { key: *slot, value: *value }, + ) + .unwrap(); + } + }) + .unwrap(); + + let post_state = HashedPostState { + accounts: Default::default(), + storages: BTreeMap::from([( + address, + HashedStorage { wiped: false, storage: post_state_storage.clone() }, + )]), + }; + + let tx = db.tx().unwrap(); + let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + let expected = + [(address, db_storage.into_iter().chain(post_state_storage.into_iter()).collect())] + .into_iter(); + assert_storage_cursor_order(&factory, expected); + } + + #[test] + fn wiped_storage_is_discarded() { + let address = H256::random(); + let db_storage = BTreeMap::from_iter( + (0..10).into_iter().map(|key| (H256::from_low_u64_be(key), U256::from(key))), + ); + let post_state_storage = BTreeMap::from_iter( + (10..20).into_iter().map(|key| (H256::from_low_u64_be(key), U256::from(key))), + ); + + let db = create_test_rw_db(); + db.update(|tx| { + for (slot, _) in db_storage { + // insert zero value accounts to the database + tx.put::( + address, + StorageEntry { key: slot, value: U256::ZERO }, + ) + .unwrap(); + } + }) + .unwrap(); + + let post_state = HashedPostState { + accounts: Default::default(), + storages: BTreeMap::from([( + address, + HashedStorage { wiped: true, storage: post_state_storage.clone() }, + )]), + }; + + let tx = db.tx().unwrap(); + let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + let expected = [(address, post_state_storage)].into_iter(); + assert_storage_cursor_order(&factory, expected); + } + + #[test] + fn post_state_storages_take_precedence() { + let address = H256::random(); + let storage = BTreeMap::from_iter( + (1..10).into_iter().map(|key| (H256::from_low_u64_be(key), U256::from(key))), + ); + + let db = create_test_rw_db(); + db.update(|tx| { + for (slot, _) in storage.iter() { + // insert zero value accounts to the database + tx.put::( + address, + StorageEntry { key: *slot, value: U256::ZERO }, + ) + .unwrap(); + } + }) + .unwrap(); + + let post_state = HashedPostState { + accounts: Default::default(), + storages: BTreeMap::from([( + address, + HashedStorage { wiped: false, storage: storage.clone() }, + )]), + }; + + let tx = db.tx().unwrap(); + let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + let expected = [(address, storage)].into_iter(); + assert_storage_cursor_order(&factory, expected); + } + + #[test] + fn fuzz_hashed_storage_cursor() { + proptest!(ProptestConfig::with_cases(10), + |( + db_storages: BTreeMap>, + post_state_storages: BTreeMap)> + )| + { + let db = create_test_rw_db(); + db.update(|tx| { + for (address, storage) in db_storages.iter() { + for (slot, value) in storage { + let entry = StorageEntry { key: *slot, value: *value }; + tx.put::(*address, entry).unwrap(); + } + } + }) + .unwrap(); + + let post_state = HashedPostState { + accounts: Default::default(), + storages: BTreeMap::from_iter(post_state_storages.iter().map( + |(address, (wiped, storage))| { + (*address, HashedStorage { wiped: *wiped, storage: storage.clone() }) + }, + )), + }; + + let mut expected = db_storages; + // overwrite or remove accounts from the expected result + for (key, (wiped, storage)) in post_state_storages { + let entry = expected.entry(key).or_default(); + if wiped { + entry.clear(); + } + entry.extend(storage); + } + + let tx = db.tx().unwrap(); + let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + assert_storage_cursor_order(&factory, expected.into_iter()); + }); + } +} diff --git a/crates/trie/src/lib.rs b/crates/trie/src/lib.rs index 6fa597ce7f..36093771fd 100644 --- a/crates/trie/src/lib.rs +++ b/crates/trie/src/lib.rs @@ -28,6 +28,9 @@ pub mod prefix_set; /// The cursor implementations for navigating account and storage tries. pub mod trie_cursor; +/// The cursor implementations for navigating hashed state. +pub mod hashed_cursor; + /// The trie walker for iterating over the trie nodes. pub mod walker; diff --git a/crates/trie/src/trie.rs b/crates/trie/src/trie.rs index 1b80091f2f..9596a46fef 100644 --- a/crates/trie/src/trie.rs +++ b/crates/trie/src/trie.rs @@ -1,6 +1,7 @@ use crate::{ account::EthAccount, hash_builder::HashBuilder, + hashed_cursor::{HashedAccountCursor, HashedCursorFactory, HashedStorageCursor}, nibbles::Nibbles, prefix_set::{PrefixSet, PrefixSetLoader}, progress::{IntermediateStateRootState, StateRootProgress}, @@ -9,19 +10,17 @@ use crate::{ walker::TrieWalker, StateRootError, StorageRootError, }; -use reth_db::{ - cursor::{DbCursorRO, DbDupCursorRO}, - tables, - transaction::DbTx, -}; +use reth_db::{tables, transaction::DbTx}; use reth_primitives::{keccak256, proofs::EMPTY_ROOT, Address, BlockNumber, StorageEntry, H256}; use reth_rlp::Encodable; use std::{collections::HashMap, ops::RangeInclusive}; /// StateRoot is used to compute the root node of a state trie. -pub struct StateRoot<'a, TX> { +pub struct StateRoot<'a, 'b, TX, H> { /// A reference to the database transaction. pub tx: &'a TX, + /// The factory for hashed cursors. + pub hashed_cursor_factory: &'b H, /// A set of account prefixes that have changed. pub changed_account_prefixes: PrefixSet, /// A map containing storage changes with the hashed address as key and a set of storage key @@ -33,18 +32,7 @@ pub struct StateRoot<'a, TX> { threshold: u64, } -impl<'a, TX> StateRoot<'a, TX> { - /// Create a new [StateRoot] instance. - pub fn new(tx: &'a TX) -> Self { - Self { - tx, - changed_account_prefixes: PrefixSet::default(), - changed_storage_prefixes: HashMap::default(), - previous_state: None, - threshold: 100_000, - } - } - +impl<'a, 'b, TX, H> StateRoot<'a, 'b, TX, H> { /// Set the changed account prefixes. pub fn with_changed_account_prefixes(mut self, prefixes: PrefixSet) -> Self { self.changed_account_prefixes = prefixes; @@ -74,9 +62,39 @@ impl<'a, TX> StateRoot<'a, TX> { self.previous_state = state; self } + + /// Set the hashed cursor factory. + pub fn with_hashed_cursor_factory<'c, HF>( + self, + hashed_cursor_factory: &'c HF, + ) -> StateRoot<'a, 'c, TX, HF> { + StateRoot { + tx: self.tx, + changed_account_prefixes: self.changed_account_prefixes, + changed_storage_prefixes: self.changed_storage_prefixes, + threshold: self.threshold, + previous_state: self.previous_state, + hashed_cursor_factory, + } + } } -impl<'a, 'tx, TX: DbTx<'tx>> StateRoot<'a, TX> { +impl<'a, 'tx, TX> StateRoot<'a, 'a, TX, TX> +where + TX: DbTx<'tx> + HashedCursorFactory<'a>, +{ + /// Create a new [StateRoot] instance. + pub fn new(tx: &'a TX) -> Self { + Self { + tx, + changed_account_prefixes: PrefixSet::default(), + changed_storage_prefixes: HashMap::default(), + previous_state: None, + threshold: 100_000, + hashed_cursor_factory: tx, + } + } + /// Given a block number range, identifies all the accounts and storage keys that /// have changed. /// @@ -136,7 +154,13 @@ impl<'a, 'tx, TX: DbTx<'tx>> StateRoot<'a, TX> { tracing::debug!(target: "loader", "incremental state root with progress"); Self::incremental_root_calculator(tx, range)?.root_with_progress() } +} +impl<'a, 'b, 'tx, TX, H> StateRoot<'a, 'b, TX, H> +where + TX: DbTx<'tx>, + H: HashedCursorFactory<'b>, +{ /// Walks the intermediate nodes of existing state trie (if any) and hashed entries. Feeds the /// nodes into the hash builder. Collects the updates in the process. /// @@ -179,7 +203,7 @@ impl<'a, 'tx, TX: DbTx<'tx>> StateRoot<'a, TX> { tracing::debug!(target: "loader", "calculating state root"); let mut trie_updates = TrieUpdates::default(); - let mut hashed_account_cursor = self.tx.cursor_read::()?; + let mut hashed_account_cursor = self.hashed_cursor_factory.hashed_account_cursor()?; let mut trie_cursor = AccountTrieCursor::new(self.tx.cursor_read::()?); @@ -248,6 +272,7 @@ impl<'a, 'tx, TX: DbTx<'tx>> StateRoot<'a, TX> { // TODO: We can consider introducing the TrieProgress::Progress/Complete // abstraction inside StorageRoot, but let's give it a try as-is for now. let storage_root_calculator = StorageRoot::new_hashed(self.tx, hashed_address) + .with_hashed_cursor_factory(self.hashed_cursor_factory) .with_changed_prefixes( self.changed_storage_prefixes .get(&hashed_address) @@ -307,16 +332,21 @@ impl<'a, 'tx, TX: DbTx<'tx>> StateRoot<'a, TX> { } /// StorageRoot is used to compute the root node of an account storage trie. -pub struct StorageRoot<'a, TX> { +pub struct StorageRoot<'a, 'b, TX, H> { /// A reference to the database transaction. pub tx: &'a TX, + /// The factory for hashed cursors. + pub hashed_cursor_factory: &'b H, /// The hashed address of an account. pub hashed_address: H256, /// The set of storage slot prefixes that have changed. pub changed_prefixes: PrefixSet, } -impl<'a, TX> StorageRoot<'a, TX> { +impl<'a, 'tx, TX> StorageRoot<'a, 'a, TX, TX> +where + TX: DbTx<'tx> + HashedCursorFactory<'a>, +{ /// Creates a new storage root calculator given an raw address. pub fn new(tx: &'a TX, address: Address) -> Self { Self::new_hashed(tx, keccak256(address)) @@ -324,7 +354,28 @@ impl<'a, TX> StorageRoot<'a, TX> { /// Creates a new storage root calculator given a hashed address. pub fn new_hashed(tx: &'a TX, hashed_address: H256) -> Self { - Self { tx, hashed_address, changed_prefixes: PrefixSet::default() } + Self { + tx, + hashed_address, + changed_prefixes: PrefixSet::default(), + hashed_cursor_factory: tx, + } + } +} + +impl<'a, 'b, TX, H> StorageRoot<'a, 'b, TX, H> { + /// Creates a new storage root calculator given an raw address. + pub fn new_with_factory(tx: &'a TX, hashed_cursor_factory: &'b H, address: Address) -> Self { + Self::new_hashed_with_factory(tx, hashed_cursor_factory, keccak256(address)) + } + + /// Creates a new storage root calculator given a hashed address. + pub fn new_hashed_with_factory( + tx: &'a TX, + hashed_cursor_factory: &'b H, + hashed_address: H256, + ) -> Self { + Self { tx, hashed_address, changed_prefixes: PrefixSet::default(), hashed_cursor_factory } } /// Set the changed prefixes. @@ -332,9 +383,26 @@ impl<'a, TX> StorageRoot<'a, TX> { self.changed_prefixes = prefixes; self } + + /// Set the hashed cursor factory. + pub fn with_hashed_cursor_factory<'c, HF>( + self, + hashed_cursor_factory: &'c HF, + ) -> StorageRoot<'a, 'c, TX, HF> { + StorageRoot { + tx: self.tx, + hashed_address: self.hashed_address, + changed_prefixes: self.changed_prefixes, + hashed_cursor_factory, + } + } } -impl<'a, 'tx, TX: DbTx<'tx>> StorageRoot<'a, TX> { +impl<'a, 'b, 'tx, TX, H> StorageRoot<'a, 'b, TX, H> +where + TX: DbTx<'tx>, + H: HashedCursorFactory<'b>, +{ /// Walks the hashed storage table entries for a given address and calculates the storage root. /// /// # Returns @@ -357,15 +425,15 @@ impl<'a, 'tx, TX: DbTx<'tx>> StorageRoot<'a, TX> { fn calculate(&self, retain_updates: bool) -> Result<(H256, TrieUpdates), StorageRootError> { tracing::debug!(target: "trie::storage_root", hashed_address = ?self.hashed_address, "calculating storage root"); - let mut hashed_storage_cursor = self.tx.cursor_dup_read::()?; + let mut hashed_storage_cursor = self.hashed_cursor_factory.hashed_storage_cursor()?; let mut trie_cursor = StorageTrieCursor::new( self.tx.cursor_dup_read::()?, self.hashed_address, ); - // do not add a branch node on empty storage - if hashed_storage_cursor.seek_exact(self.hashed_address)?.is_none() { + // short circuit on empty storage + if hashed_storage_cursor.is_empty(self.hashed_address)? { return Ok(( EMPTY_ROOT, TrieUpdates::from([(TrieKey::StorageTrie(self.hashed_address), TrieOp::Delete)]), @@ -388,8 +456,7 @@ impl<'a, 'tx, TX: DbTx<'tx>> StorageRoot<'a, TX> { }; let next_key = walker.advance()?; - let mut storage = - hashed_storage_cursor.seek_by_key_subkey(self.hashed_address, seek_key)?; + let mut storage = hashed_storage_cursor.seek(self.hashed_address, seek_key)?; while let Some(StorageEntry { key: hashed_key, value }) = storage { let storage_key_nibbles = Nibbles::unpack(hashed_key); if let Some(ref key) = next_key { @@ -399,7 +466,7 @@ impl<'a, 'tx, TX: DbTx<'tx>> StorageRoot<'a, TX> { } hash_builder .add_leaf(storage_key_nibbles, reth_rlp::encode_fixed_size(&value).as_ref()); - storage = hashed_storage_cursor.next_dup_val()?; + storage = hashed_storage_cursor.next()?; } } @@ -425,7 +492,7 @@ mod tests { }; use proptest::{prelude::ProptestConfig, proptest}; use reth_db::{ - cursor::DbCursorRW, + cursor::{DbCursorRO, DbCursorRW, DbDupCursorRO}, mdbx::{test_utils::create_test_rw_db, Env, WriteMap}, tables, transaction::DbTxMut,