diff --git a/crates/trie/trie/src/forward_cursor.rs b/crates/trie/trie/src/forward_cursor.rs new file mode 100644 index 0000000000..1f14a462b1 --- /dev/null +++ b/crates/trie/trie/src/forward_cursor.rs @@ -0,0 +1,51 @@ +/// The implementation of forward-only in memory cursor over the entries. +/// The cursor operates under the assumption that the supplied collection is pre-sorted. +#[derive(Debug)] +pub struct ForwardInMemoryCursor<'a, K, V> { + /// The reference to the pre-sorted collection of entries. + entries: &'a Vec<(K, V)>, + /// The index where cursor is currently positioned. + index: usize, +} + +impl<'a, K, V> ForwardInMemoryCursor<'a, K, V> { + /// Create new forward cursor positioned at the beginning of the collection. + /// The cursor expects all of the entries have been sorted in advance. + pub const fn new(entries: &'a Vec<(K, V)>) -> Self { + Self { entries, index: 0 } + } + + /// Returns `true` if the cursor is empty, regardless of its position. + pub fn is_empty(&self) -> bool { + self.entries.is_empty() + } +} + +impl<'a, K, V> ForwardInMemoryCursor<'a, K, V> +where + K: PartialOrd + Copy, + V: Copy, +{ + /// Advances the cursor forward while `comparator` returns `true` or until the collection is + /// exhausted. Returns the first entry for which `comparator` returns `false` or `None`. + fn advance_while_false(&mut self, comparator: impl Fn(&K) -> bool) -> Option<(K, V)> { + let mut entry = self.entries.get(self.index); + while entry.map_or(false, |entry| comparator(&entry.0)) { + self.index += 1; + entry = self.entries.get(self.index); + } + entry.copied() + } + + /// Returns the first entry from the current cursor position that's greater or equal to the + /// provided key. This method advances the cursor forward. + pub fn seek(&mut self, key: &K) -> Option<(K, V)> { + self.advance_while_false(|k| k < key) + } + + /// Returns the first entry from the current cursor position that's greater than the provided + /// key. This method advances the cursor forward. + pub fn first_after(&mut self, key: &K) -> Option<(K, V)> { + self.advance_while_false(|k| k <= key) + } +} diff --git a/crates/trie/trie/src/hashed_cursor/post_state.rs b/crates/trie/trie/src/hashed_cursor/post_state.rs index b609048faf..ac262f3d44 100644 --- a/crates/trie/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/trie/src/hashed_cursor/post_state.rs @@ -1,6 +1,11 @@ use super::{HashedCursor, HashedCursorFactory, HashedStorageCursor}; -use crate::{HashedAccountsSorted, HashedPostStateSorted, HashedStorageSorted}; +use crate::{ + forward_cursor::ForwardInMemoryCursor, HashedAccountsSorted, HashedPostStateSorted, + HashedStorageSorted, +}; +use reth_db::DatabaseError; use reth_primitives::{Account, B256, U256}; +use std::collections::HashSet; /// The hashed cursor factory for the post state. #[derive(Debug, Clone)] @@ -20,7 +25,7 @@ impl<'a, CF: HashedCursorFactory> HashedCursorFactory for HashedPostStateCursorF type AccountCursor = HashedPostStateAccountCursor<'a, CF::AccountCursor>; type StorageCursor = HashedPostStateStorageCursor<'a, CF::StorageCursor>; - fn hashed_account_cursor(&self) -> Result { + fn hashed_account_cursor(&self) -> Result { let cursor = self.cursor_factory.hashed_account_cursor()?; Ok(HashedPostStateAccountCursor::new(cursor, &self.post_state.accounts)) } @@ -28,7 +33,7 @@ impl<'a, CF: HashedCursorFactory> HashedCursorFactory for HashedPostStateCursorF fn hashed_storage_cursor( &self, hashed_address: B256, - ) -> Result { + ) -> Result { let cursor = self.cursor_factory.hashed_storage_cursor(hashed_address)?; Ok(HashedPostStateStorageCursor::new(cursor, self.post_state.storages.get(&hashed_address))) } @@ -36,23 +41,28 @@ impl<'a, CF: HashedCursorFactory> HashedCursorFactory for HashedPostStateCursorF /// The cursor to iterate over post state hashed accounts and corresponding database entries. /// It will always give precedence to the data from the hashed post state. -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct HashedPostStateAccountCursor<'a, C> { /// The database cursor. cursor: C, - /// The reference to the in-memory [`HashedAccountsSorted`]. - post_state_accounts: &'a HashedAccountsSorted, - /// The post state account index where the cursor is currently at. - post_state_account_index: usize, + /// Forward-only in-memory cursor over accounts. + post_state_cursor: ForwardInMemoryCursor<'a, B256, Account>, + /// Reference to the collection of account keys that were destroyed. + destroyed_accounts: &'a HashSet, /// The last hashed account that was returned by the cursor. /// De facto, this is a current cursor position. last_account: Option, } -impl<'a, C> HashedPostStateAccountCursor<'a, C> { +impl<'a, C> HashedPostStateAccountCursor<'a, C> +where + C: HashedCursor, +{ /// Create new instance of [`HashedPostStateAccountCursor`]. pub const fn new(cursor: C, post_state_accounts: &'a HashedAccountsSorted) -> Self { - Self { cursor, post_state_accounts, last_account: None, post_state_account_index: 0 } + let post_state_cursor = ForwardInMemoryCursor::new(&post_state_accounts.accounts); + let destroyed_accounts = &post_state_accounts.destroyed_accounts; + Self { cursor, post_state_cursor, destroyed_accounts, last_account: None } } /// Returns `true` if the account has been destroyed. @@ -61,29 +71,62 @@ impl<'a, C> HashedPostStateAccountCursor<'a, C> { /// This function only checks the post state, not the database, because the latter does not /// store destroyed accounts. fn is_account_cleared(&self, account: &B256) -> bool { - self.post_state_accounts.destroyed_accounts.contains(account) + self.destroyed_accounts.contains(account) + } + + fn seek_inner(&mut self, key: B256) -> Result, DatabaseError> { + // Take the next account from the post state with the key greater than or equal to the + // sought key. + let post_state_entry = self.post_state_cursor.seek(&key); + + // It's an exact match, return the account from post state without looking up in the + // database. + if post_state_entry.map_or(false, |entry| entry.0 == key) { + return Ok(post_state_entry) + } + + // It's not an exact match, reposition to the first greater or equal account that wasn't + // cleared. + let mut db_entry = self.cursor.seek(key)?; + while db_entry.as_ref().map_or(false, |(address, _)| self.is_account_cleared(address)) { + db_entry = self.cursor.next()?; + } + + // Compare two entries and return the lowest. + Ok(Self::compare_entries(post_state_entry, db_entry)) + } + + fn next_inner(&mut self, last_account: B256) -> Result, DatabaseError> { + // Take the next account from the post state with the key greater than the last sought key. + let post_state_entry = self.post_state_cursor.first_after(&last_account); + + // If post state was given precedence or account was cleared, move the cursor forward. + let mut db_entry = self.cursor.seek(last_account)?; + while db_entry.as_ref().map_or(false, |(address, _)| { + address <= &last_account || self.is_account_cleared(address) + }) { + db_entry = self.cursor.next()?; + } + + // Compare two entries and return the lowest. + Ok(Self::compare_entries(post_state_entry, db_entry)) } /// Return the account with the lowest hashed account key. /// /// Given the next post state and database entries, return the smallest of the two. /// If the account keys are the same, the post state entry is given precedence. - fn next_account( - post_state_item: Option<&(B256, Account)>, + fn compare_entries( + post_state_item: Option<(B256, Account)>, db_item: Option<(B256, Account)>, ) -> Option<(B256, Account)> { - match (post_state_item, db_item) { + if let Some((post_state_entry, db_entry)) = post_state_item.zip(db_item) { // If both are not empty, return the smallest of the two // Post state is given precedence if keys are equal - (Some((post_state_address, post_state_account)), Some((db_address, db_account))) => { - if post_state_address <= &db_address { - Some((*post_state_address, *post_state_account)) - } else { - Some((db_address, db_account)) - } - } + Some(if post_state_entry.0 <= db_entry.0 { post_state_entry } else { db_entry }) + } else { // Return either non-empty entry - _ => post_state_item.copied().or(db_item), + db_item.or(post_state_item) } } } @@ -102,42 +145,11 @@ where /// /// The returned account key is memoized and the cursor remains positioned at that key until /// [`HashedCursor::seek`] or [`HashedCursor::next`] are called. - fn seek(&mut self, key: B256) -> Result, reth_db::DatabaseError> { - self.last_account = None; - - // Take the next account from the post state with the key greater than or equal to the - // sought key. - let mut post_state_entry = - self.post_state_accounts.accounts.get(self.post_state_account_index); - while post_state_entry.map(|(k, _)| k < &key).unwrap_or_default() { - self.post_state_account_index += 1; - post_state_entry = self.post_state_accounts.accounts.get(self.post_state_account_index); - } - - // It's an exact match, return the account from post state without looking up in the - // database. - if let Some((address, account)) = post_state_entry { - if address == &key { - self.last_account = Some(*address); - return Ok(Some((*address, *account))) - } - } - - // It's not an exact match, reposition to the first greater or equal account that wasn't - // cleared. - let mut db_entry = self.cursor.seek(key)?; - while db_entry - .as_ref() - .map(|(address, _)| self.is_account_cleared(address)) - .unwrap_or_default() - { - db_entry = self.cursor.next()?; - } - - // Compare two entries and return the lowest. - let result = Self::next_account(post_state_entry, db_entry); - self.last_account = result.as_ref().map(|(address, _)| *address); - Ok(result) + fn seek(&mut self, key: B256) -> Result, DatabaseError> { + // Find the closes account. + let entry = self.seek_inner(key)?; + self.last_account = entry.as_ref().map(|entry| entry.0); + Ok(entry) } /// Retrieve the next entry from the cursor. @@ -147,100 +159,118 @@ where /// /// NOTE: This function will not return any entry unless [`HashedCursor::seek`] has been /// called. - fn next(&mut self) -> Result, reth_db::DatabaseError> { - let last_account = match self.last_account.as_ref() { - Some(account) => account, - None => return Ok(None), // no previous entry was found + fn next(&mut self) -> Result, DatabaseError> { + let next = match self.last_account { + Some(account) => { + let entry = self.next_inner(account)?; + self.last_account = entry.as_ref().map(|entry| entry.0); + entry + } + // no previous entry was found + None => None, }; - - // If post state was given precedence, move the cursor forward. - let mut db_entry = self.cursor.seek(*last_account)?; - while db_entry - .as_ref() - .map(|(address, _)| address <= last_account || self.is_account_cleared(address)) - .unwrap_or_default() - { - db_entry = self.cursor.next()?; - } - - // Take the next account from the post state with the key greater than the last sought key. - let mut post_state_entry = - self.post_state_accounts.accounts.get(self.post_state_account_index); - while post_state_entry.map(|(k, _)| k <= last_account).unwrap_or_default() { - self.post_state_account_index += 1; - post_state_entry = self.post_state_accounts.accounts.get(self.post_state_account_index); - } - - // Compare two entries and return the lowest. - let result = Self::next_account(post_state_entry, db_entry); - self.last_account = result.as_ref().map(|(address, _)| *address); - Ok(result) + Ok(next) } } /// The cursor to iterate over post state hashed storages and corresponding database entries. /// It will always give precedence to the data from the post state. -#[derive(Debug, Clone)] +#[derive(Debug)] pub struct HashedPostStateStorageCursor<'a, C> { /// The database cursor. cursor: C, - /// The reference to post state storage. - post_state_storage: Option<&'a HashedStorageSorted>, - /// The post state index where the cursor is currently at. - post_state_storage_index: usize, + /// Forward-only in-memory cursor over non zero-valued account storage slots. + post_state_cursor: Option>, + /// Reference to the collection of storage slot keys that were cleared. + cleared_slots: Option<&'a HashSet>, + /// Flag indicating whether database storage was wiped. + storage_wiped: bool, /// The last slot that has been returned by the cursor. /// De facto, this is the cursor's position for the given account key. last_slot: Option, } -impl<'a, C> HashedPostStateStorageCursor<'a, C> { +impl<'a, C> HashedPostStateStorageCursor<'a, C> +where + C: HashedStorageCursor, +{ /// Create new instance of [`HashedPostStateStorageCursor`] for the given hashed address. - pub const fn new(cursor: C, post_state: Option<&'a HashedStorageSorted>) -> Self { - Self { - cursor, - post_state_storage: post_state, - last_slot: None, - post_state_storage_index: 0, - } - } - - /// Returns `true` if the storage for the given - /// The database is not checked since it already has no wiped storage entries. - const fn is_db_storage_wiped(&self) -> bool { - match self.post_state_storage { - Some(storage) => storage.wiped, - None => false, - } + pub fn new(cursor: C, post_state_storage: Option<&'a HashedStorageSorted>) -> Self { + let post_state_cursor = + post_state_storage.map(|s| ForwardInMemoryCursor::new(&s.non_zero_valued_slots)); + let cleared_slots = post_state_storage.map(|s| &s.zero_valued_slots); + let storage_wiped = post_state_storage.map_or(false, |s| s.wiped); + Self { cursor, post_state_cursor, cleared_slots, storage_wiped, last_slot: None } } /// Check if the slot was zeroed out in the post state. /// The database is not checked since it already has no zero-valued slots. fn is_slot_zero_valued(&self, slot: &B256) -> bool { - self.post_state_storage - .map(|storage| storage.zero_valued_slots.contains(slot)) - .unwrap_or_default() + self.cleared_slots.map_or(false, |s| s.contains(slot)) + } + + /// Find the storage entry in post state or database that's greater or equal to provided subkey. + fn seek_inner(&mut self, subkey: B256) -> Result, DatabaseError> { + // Attempt to find the account's storage in post state. + let post_state_entry = self.post_state_cursor.as_mut().and_then(|c| c.seek(&subkey)); + + // If database storage was wiped or it's an exact match, + // return the storage slot from post state without looking up in the database. + if self.storage_wiped || post_state_entry.map_or(false, |entry| entry.0 == subkey) { + return Ok(post_state_entry) + } + + // It's not an exact match and storage was not wiped, + // reposition to the first greater or equal account. + let mut db_entry = self.cursor.seek(subkey)?; + while db_entry.as_ref().map_or(false, |entry| self.is_slot_zero_valued(&entry.0)) { + db_entry = self.cursor.next()?; + } + + // Compare two entries and return the lowest. + Ok(Self::compare_entries(post_state_entry, db_entry)) + } + + /// Find the storage entry that is right after current cursor position. + fn next_inner(&mut self, last_slot: B256) -> Result, DatabaseError> { + // Attempt to find the account's storage in post state. + let post_state_entry = + self.post_state_cursor.as_mut().and_then(|c| c.first_after(&last_slot)); + + // Return post state entry immediately if database was wiped. + if self.storage_wiped { + return Ok(post_state_entry) + } + + // If post state was given precedence, move the cursor forward. + // If the entry was already returned or is zero-valued, move to the next. + let mut db_entry = self.cursor.seek(last_slot)?; + while db_entry + .as_ref() + .map_or(false, |entry| entry.0 == last_slot || self.is_slot_zero_valued(&entry.0)) + { + db_entry = self.cursor.next()?; + } + + // Compare two entries and return the lowest. + Ok(Self::compare_entries(post_state_entry, db_entry)) } /// Return the storage entry with the lowest hashed storage key (hashed slot). /// /// Given the next post state and database entries, return the smallest of the two. /// If the storage keys are the same, the post state entry is given precedence. - fn next_slot( - post_state_item: Option<&(B256, U256)>, + fn compare_entries( + post_state_item: Option<(B256, U256)>, db_item: Option<(B256, U256)>, ) -> Option<(B256, U256)> { - match (post_state_item, db_item) { + if let Some((post_state_entry, db_entry)) = post_state_item.zip(db_item) { // If both are not empty, return the smallest of the two // Post state is given precedence if keys are equal - (Some((post_state_slot, post_state_value)), Some((db_slot, db_value))) => { - if post_state_slot <= &db_slot { - Some((*post_state_slot, *post_state_value)) - } else { - Some((db_slot, db_value)) - } - } + Some(if post_state_entry.0 <= db_entry.0 { post_state_entry } else { db_entry }) + } else { // Return either non-empty entry - _ => db_item.or_else(|| post_state_item.copied()), + db_item.or(post_state_item) } } } @@ -252,97 +282,24 @@ where type Value = U256; /// Seek the next account storage entry for a given hashed key pair. - fn seek( - &mut self, - subkey: B256, - ) -> Result, reth_db::DatabaseError> { - // Attempt to find the account's storage in post state. - let mut post_state_entry = None; - if let Some(storage) = self.post_state_storage { - post_state_entry = storage.non_zero_valued_slots.get(self.post_state_storage_index); - - while post_state_entry.map(|(slot, _)| slot < &subkey).unwrap_or_default() { - self.post_state_storage_index += 1; - post_state_entry = storage.non_zero_valued_slots.get(self.post_state_storage_index); - } - } - - // It's an exact match, return the storage slot from post state without looking up in - // the database. - if let Some((slot, value)) = post_state_entry { - if slot == &subkey { - self.last_slot = Some(*slot); - return Ok(Some((*slot, *value))) - } - } - - // It's not an exact match, reposition to the first greater or equal account. - let db_entry = if self.is_db_storage_wiped() { - None - } else { - let mut db_entry = self.cursor.seek(subkey)?; - - while db_entry - .as_ref() - .map(|entry| self.is_slot_zero_valued(&entry.0)) - .unwrap_or_default() - { - db_entry = self.cursor.next()?; - } - - db_entry - }; - - // Compare two entries and return the lowest. - let result = Self::next_slot(post_state_entry, db_entry); - self.last_slot = result.as_ref().map(|entry| entry.0); - Ok(result) + fn seek(&mut self, subkey: B256) -> Result, DatabaseError> { + let entry = self.seek_inner(subkey)?; + self.last_slot = entry.as_ref().map(|entry| entry.0); + Ok(entry) } /// Return the next account storage entry for the current account key. - /// - /// # Panics - /// - /// If the account key is not set. [`HashedCursor::seek`] must be called first in order to - /// position the cursor. - fn next(&mut self) -> Result, reth_db::DatabaseError> { - let last_slot = match self.last_slot.as_ref() { - Some(slot) => slot, - None => return Ok(None), // no previous entry was found - }; - - let db_entry = if self.is_db_storage_wiped() { - None - } else { - // If post state was given precedence, move the cursor forward. - let mut db_entry = self.cursor.seek(*last_slot)?; - - // If the entry was already returned or is zero-values, move to the next. - while db_entry - .as_ref() - .map(|entry| &entry.0 == last_slot || self.is_slot_zero_valued(&entry.0)) - .unwrap_or_default() - { - db_entry = self.cursor.next()?; + fn next(&mut self) -> Result, DatabaseError> { + let next = match self.last_slot { + Some(last_slot) => { + let entry = self.next_inner(last_slot)?; + self.last_slot = entry.as_ref().map(|entry| entry.0); + entry } - - db_entry + // no previous entry was found + None => None, }; - - // Attempt to find the account's storage in post state. - let mut post_state_entry = None; - if let Some(storage) = self.post_state_storage { - post_state_entry = storage.non_zero_valued_slots.get(self.post_state_storage_index); - while post_state_entry.map(|(slot, _)| slot <= last_slot).unwrap_or_default() { - self.post_state_storage_index += 1; - post_state_entry = storage.non_zero_valued_slots.get(self.post_state_storage_index); - } - } - - // Compare two entries and return the lowest. - let result = Self::next_slot(post_state_entry, db_entry); - self.last_slot = result.as_ref().map(|entry| entry.0); - Ok(result) + Ok(next) } } @@ -354,13 +311,13 @@ where /// /// This function should be called before attempting to call [`HashedCursor::seek`] or /// [`HashedCursor::next`]. - fn is_storage_empty(&mut self) -> Result { - let is_empty = match self.post_state_storage { - Some(storage) => { + fn is_storage_empty(&mut self) -> Result { + let is_empty = match &self.post_state_cursor { + Some(cursor) => { // If the storage has been wiped at any point - storage.wiped && + self.storage_wiped && // and the current storage does not contain any non-zero values - storage.non_zero_valued_slots.is_empty() + cursor.is_empty() } None => self.cursor.is_storage_empty()?, }; diff --git a/crates/trie/trie/src/lib.rs b/crates/trie/trie/src/lib.rs index eea65a7b34..07af077570 100644 --- a/crates/trie/trie/src/lib.rs +++ b/crates/trie/trie/src/lib.rs @@ -17,6 +17,9 @@ /// The container indicates when the trie has been modified. pub mod prefix_set; +/// The implementation of forward-only in-memory cursor. +pub mod forward_cursor; + /// The cursor implementations for navigating account and storage tries. pub mod trie_cursor;