mirror of
https://github.com/paradigmxyz/reth.git
synced 2026-04-30 03:01:58 -04:00
feat(stages): add table checkpoint to AccountHashing and StorageHashing (#1667)
Co-authored-by: Georgios Konstantopoulos <me@gakonst.com>
This commit is contained in:
42
crates/primitives/src/checkpoints.rs
Normal file
42
crates/primitives/src/checkpoints.rs
Normal file
@@ -0,0 +1,42 @@
|
||||
use crate::{Address, H256};
|
||||
use reth_codecs::{main_codec, Compact};
|
||||
|
||||
/// Saves the progress of MerkleStage
|
||||
#[main_codec]
|
||||
#[derive(Default, Debug, Copy, Clone, PartialEq)]
|
||||
pub struct ProofCheckpoint {
|
||||
/// The next hashed account to insert into the trie.
|
||||
pub hashed_address: Option<H256>,
|
||||
/// The next storage entry to insert into the trie.
|
||||
pub storage_key: Option<H256>,
|
||||
/// Current intermediate root for `AccountsTrie`.
|
||||
pub account_root: Option<H256>,
|
||||
/// Current intermediate storage root from an account.
|
||||
pub storage_root: Option<H256>,
|
||||
}
|
||||
|
||||
/// Saves the progress of AccountHashing
|
||||
#[main_codec]
|
||||
#[derive(Default, Debug, Copy, Clone, PartialEq)]
|
||||
pub struct AccountHashingCheckpoint {
|
||||
/// The next account to start hashing from
|
||||
pub address: Option<Address>,
|
||||
/// Start transition id
|
||||
pub from: u64,
|
||||
/// Last transition id
|
||||
pub to: u64,
|
||||
}
|
||||
|
||||
/// Saves the progress of StorageHashing
|
||||
#[main_codec]
|
||||
#[derive(Default, Debug, Copy, Clone, PartialEq)]
|
||||
pub struct StorageHashingCheckpoint {
|
||||
/// The next account to start hashing from
|
||||
pub address: Option<Address>,
|
||||
/// The next storage slot to start hashing from
|
||||
pub storage: Option<H256>,
|
||||
/// Start transition id
|
||||
pub from: u64,
|
||||
/// Last transition id
|
||||
pub to: u64,
|
||||
}
|
||||
@@ -14,6 +14,7 @@ mod bits;
|
||||
mod block;
|
||||
pub mod bloom;
|
||||
mod chain;
|
||||
mod checkpoints;
|
||||
pub mod constants;
|
||||
pub mod contract;
|
||||
mod error;
|
||||
@@ -34,7 +35,6 @@ mod withdrawal;
|
||||
|
||||
/// Helper function for calculating Merkle proofs and hashes
|
||||
pub mod proofs;
|
||||
pub use proofs::ProofCheckpoint;
|
||||
|
||||
pub use account::{Account, Bytecode};
|
||||
pub use bits::H512;
|
||||
@@ -46,6 +46,7 @@ pub use chain::{
|
||||
AllGenesisFormats, Chain, ChainInfo, ChainSpec, ChainSpecBuilder, ForkCondition, GOERLI,
|
||||
MAINNET, SEPOLIA,
|
||||
};
|
||||
pub use checkpoints::{AccountHashingCheckpoint, ProofCheckpoint, StorageHashingCheckpoint};
|
||||
pub use constants::{
|
||||
EMPTY_OMMER_ROOT, GOERLI_GENESIS, KECCAK_EMPTY, MAINNET_GENESIS, SEPOLIA_GENESIS,
|
||||
};
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::{
|
||||
keccak256, Address, Bytes, GenesisAccount, Header, Log, Receipt, TransactionSigned, Withdrawal,
|
||||
H256,
|
||||
@@ -8,8 +6,8 @@ use bytes::BytesMut;
|
||||
use hash_db::Hasher;
|
||||
use hex_literal::hex;
|
||||
use plain_hasher::PlainHasher;
|
||||
use reth_codecs::{main_codec, Compact};
|
||||
use reth_rlp::Encodable;
|
||||
use std::collections::HashMap;
|
||||
use triehash::{ordered_trie_root, sec_trie_root};
|
||||
|
||||
/// Keccak-256 hash of the RLP of an empty list, KEC("\xc0").
|
||||
@@ -35,23 +33,8 @@ impl Hasher for KeccakHasher {
|
||||
}
|
||||
}
|
||||
|
||||
/// Saves the progress of MerkleStage
|
||||
#[main_codec]
|
||||
#[derive(Default, Debug, Copy, Clone, PartialEq)]
|
||||
pub struct ProofCheckpoint {
|
||||
/// The next hashed account to insert into the trie.
|
||||
pub hashed_address: Option<H256>,
|
||||
/// The next storage entry to insert into the trie.
|
||||
pub storage_key: Option<H256>,
|
||||
/// Current intermediate root for `AccountsTrie`.
|
||||
pub account_root: Option<H256>,
|
||||
/// Current intermediate storage root from an account.
|
||||
pub storage_root: Option<H256>,
|
||||
}
|
||||
|
||||
/// Calculate a transaction root.
|
||||
///
|
||||
/// Iterates over the given transactions and the merkle merkle trie root of
|
||||
/// `(rlp(index), encoded(tx))` pairs.
|
||||
pub fn calculate_transaction_root<'a>(
|
||||
transactions: impl IntoIterator<Item = &'a TransactionSigned>,
|
||||
|
||||
@@ -18,6 +18,7 @@ normal = [
|
||||
reth-primitives = { path = "../primitives" }
|
||||
reth-interfaces = { path = "../interfaces" }
|
||||
reth-db = { path = "../storage/db" }
|
||||
reth-codecs = { path = "../storage/codecs" }
|
||||
reth-provider = { path = "../storage/provider" }
|
||||
reth-metrics-derive = { path = "../metrics/metrics-derive" }
|
||||
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
use crate::{ExecInput, ExecOutput, Stage, StageError, StageId, UnwindInput, UnwindOutput};
|
||||
use reth_codecs::Compact;
|
||||
use reth_db::{
|
||||
cursor::{DbCursorRO, DbCursorRW},
|
||||
database::Database,
|
||||
tables,
|
||||
transaction::{DbTx, DbTxMut},
|
||||
};
|
||||
use reth_primitives::keccak256;
|
||||
use reth_primitives::{keccak256, AccountHashingCheckpoint};
|
||||
use reth_provider::Transaction;
|
||||
use std::{collections::BTreeMap, fmt::Debug, ops::Range};
|
||||
use tracing::*;
|
||||
@@ -30,6 +31,43 @@ impl Default for AccountHashingStage {
|
||||
}
|
||||
}
|
||||
|
||||
impl AccountHashingStage {
|
||||
/// Saves the hashing progress
|
||||
pub fn save_checkpoint<DB: Database>(
|
||||
&mut self,
|
||||
tx: &Transaction<'_, DB>,
|
||||
checkpoint: AccountHashingCheckpoint,
|
||||
) -> Result<(), StageError> {
|
||||
debug!(target: "sync::stages::account_hashing::exec", checkpoint = ?checkpoint, "Saving inner account hashing checkpoint");
|
||||
|
||||
let mut buf = vec![];
|
||||
checkpoint.to_compact(&mut buf);
|
||||
|
||||
Ok(tx.put::<tables::SyncStageProgress>(ACCOUNT_HASHING.0.into(), buf)?)
|
||||
}
|
||||
|
||||
/// Gets the hashing progress
|
||||
pub fn get_checkpoint<DB: Database>(
|
||||
&self,
|
||||
tx: &Transaction<'_, DB>,
|
||||
) -> Result<AccountHashingCheckpoint, StageError> {
|
||||
let buf =
|
||||
tx.get::<tables::SyncStageProgress>(ACCOUNT_HASHING.0.into())?.unwrap_or_default();
|
||||
|
||||
if buf.is_empty() {
|
||||
return Ok(AccountHashingCheckpoint::default())
|
||||
}
|
||||
|
||||
let (checkpoint, _) = AccountHashingCheckpoint::from_compact(&buf, buf.len());
|
||||
|
||||
if checkpoint.address.is_some() {
|
||||
debug!(target: "sync::stages::account_hashing::exec", checkpoint = ?checkpoint, "Continuing inner account hashing checkpoint");
|
||||
}
|
||||
|
||||
Ok(checkpoint)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
/// `SeedOpts` provides configuration parameters for calling `AccountHashingStage::seed`
|
||||
/// in unit tests or benchmarks to generate an initial database state for running the
|
||||
@@ -137,43 +175,58 @@ impl<DB: Database> Stage<DB> for AccountHashingStage {
|
||||
// AccountHashing table. Also, if we start from genesis, we need to hash from scratch, as
|
||||
// genesis accounts are not in changeset.
|
||||
if to_transition - from_transition > self.clean_threshold || stage_progress == 0 {
|
||||
// clear table, load all accounts and hash it
|
||||
tx.clear::<tables::HashedAccount>()?;
|
||||
tx.commit()?;
|
||||
let mut checkpoint = self.get_checkpoint(tx)?;
|
||||
|
||||
let mut first_key = None;
|
||||
loop {
|
||||
let next_key = {
|
||||
let mut accounts = tx.cursor_read::<tables::PlainAccountState>()?;
|
||||
if checkpoint.address.is_none() ||
|
||||
// Checkpoint is no longer valid if the range of transitions changed.
|
||||
// An already hashed account may have been changed with the new range, and therefore should be hashed again.
|
||||
checkpoint.to != to_transition ||
|
||||
checkpoint.from != from_transition
|
||||
{
|
||||
// clear table, load all accounts and hash it
|
||||
tx.clear::<tables::HashedAccount>()?;
|
||||
|
||||
let hashed_batch = accounts
|
||||
.walk(first_key)?
|
||||
.take(self.commit_threshold as usize)
|
||||
.map(|res| res.map(|(address, account)| (keccak256(address), account)))
|
||||
.collect::<Result<BTreeMap<_, _>, _>>()?;
|
||||
checkpoint = AccountHashingCheckpoint::default();
|
||||
self.save_checkpoint(tx, checkpoint)?;
|
||||
}
|
||||
|
||||
let mut hashed_account_cursor = tx.cursor_write::<tables::HashedAccount>()?;
|
||||
let start_address = checkpoint.address.take();
|
||||
let next_address = {
|
||||
let mut accounts = tx.cursor_read::<tables::PlainAccountState>()?;
|
||||
|
||||
// iterate and put presorted hashed accounts
|
||||
if first_key.is_none() {
|
||||
hashed_batch
|
||||
.into_iter()
|
||||
.try_for_each(|(k, v)| hashed_account_cursor.append(k, v))?;
|
||||
} else {
|
||||
hashed_batch
|
||||
.into_iter()
|
||||
.try_for_each(|(k, v)| hashed_account_cursor.insert(k, v))?;
|
||||
}
|
||||
let hashed_batch = accounts
|
||||
.walk(start_address)?
|
||||
.take(self.commit_threshold as usize)
|
||||
.map(|res| res.map(|(address, account)| (keccak256(address), account)))
|
||||
.collect::<Result<BTreeMap<_, _>, _>>()?;
|
||||
|
||||
// next key of iterator
|
||||
accounts.next()?
|
||||
};
|
||||
tx.commit()?;
|
||||
if let Some((next_key, _)) = next_key {
|
||||
first_key = Some(next_key);
|
||||
continue
|
||||
let mut hashed_account_cursor = tx.cursor_write::<tables::HashedAccount>()?;
|
||||
|
||||
// iterate and put presorted hashed accounts
|
||||
if start_address.is_none() {
|
||||
hashed_batch
|
||||
.into_iter()
|
||||
.try_for_each(|(k, v)| hashed_account_cursor.append(k, v))?;
|
||||
} else {
|
||||
hashed_batch
|
||||
.into_iter()
|
||||
.try_for_each(|(k, v)| hashed_account_cursor.insert(k, v))?;
|
||||
}
|
||||
break
|
||||
|
||||
// next key of iterator
|
||||
accounts.next()?
|
||||
};
|
||||
|
||||
if let Some((next_address, _)) = &next_address {
|
||||
checkpoint.address = Some(*next_address);
|
||||
checkpoint.from = from_transition;
|
||||
checkpoint.to = to_transition;
|
||||
}
|
||||
|
||||
self.save_checkpoint(tx, checkpoint)?;
|
||||
|
||||
if next_address.is_some() {
|
||||
return Ok(ExecOutput { stage_progress, done: false })
|
||||
}
|
||||
} else {
|
||||
// Aggregate all transition changesets and and make list of account that have been
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use crate::{ExecInput, ExecOutput, Stage, StageError, StageId, UnwindInput, UnwindOutput};
|
||||
use num_traits::Zero;
|
||||
use reth_codecs::Compact;
|
||||
use reth_db::{
|
||||
cursor::DbDupCursorRO,
|
||||
database::Database,
|
||||
@@ -7,7 +8,7 @@ use reth_db::{
|
||||
tables,
|
||||
transaction::{DbTx, DbTxMut},
|
||||
};
|
||||
use reth_primitives::{keccak256, Address, StorageEntry};
|
||||
use reth_primitives::{keccak256, Address, StorageEntry, StorageHashingCheckpoint};
|
||||
use reth_provider::Transaction;
|
||||
use std::{collections::BTreeMap, fmt::Debug};
|
||||
use tracing::*;
|
||||
@@ -32,6 +33,43 @@ impl Default for StorageHashingStage {
|
||||
}
|
||||
}
|
||||
|
||||
impl StorageHashingStage {
|
||||
/// Saves the hashing progress
|
||||
pub fn save_checkpoint<DB: Database>(
|
||||
&mut self,
|
||||
tx: &Transaction<'_, DB>,
|
||||
checkpoint: StorageHashingCheckpoint,
|
||||
) -> Result<(), StageError> {
|
||||
debug!(target: "sync::stages::storage_hashing::exec", checkpoint = ?checkpoint, "Saving inner storage hashing checkpoint");
|
||||
|
||||
let mut buf = vec![];
|
||||
checkpoint.to_compact(&mut buf);
|
||||
|
||||
Ok(tx.put::<tables::SyncStageProgress>(STORAGE_HASHING.0.into(), buf)?)
|
||||
}
|
||||
|
||||
/// Gets the hashing progress
|
||||
pub fn get_checkpoint<DB: Database>(
|
||||
&self,
|
||||
tx: &Transaction<'_, DB>,
|
||||
) -> Result<StorageHashingCheckpoint, StageError> {
|
||||
let buf =
|
||||
tx.get::<tables::SyncStageProgress>(STORAGE_HASHING.0.into())?.unwrap_or_default();
|
||||
|
||||
if buf.is_empty() {
|
||||
return Ok(StorageHashingCheckpoint::default())
|
||||
}
|
||||
|
||||
let (checkpoint, _) = StorageHashingCheckpoint::from_compact(&buf, buf.len());
|
||||
|
||||
if checkpoint.address.is_some() {
|
||||
debug!(target: "sync::stages::storage_hashing::exec", checkpoint = ?checkpoint, "Continuing inner storage hashing checkpoint");
|
||||
}
|
||||
|
||||
Ok(checkpoint)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl<DB: Database> Stage<DB> for StorageHashingStage {
|
||||
/// Return the id of the stage
|
||||
@@ -57,77 +95,92 @@ impl<DB: Database> Stage<DB> for StorageHashingStage {
|
||||
// AccountHashing table. Also, if we start from genesis, we need to hash from scratch, as
|
||||
// genesis accounts are not in changeset, along with their storages.
|
||||
if to_transition - from_transition > self.clean_threshold || stage_progress == 0 {
|
||||
tx.clear::<tables::HashedStorage>()?;
|
||||
tx.commit()?;
|
||||
let mut checkpoint = self.get_checkpoint(tx)?;
|
||||
|
||||
let mut current_key = None;
|
||||
let mut current_subkey = None;
|
||||
if checkpoint.address.is_none() ||
|
||||
// Checkpoint is no longer valid if the range of transitions changed.
|
||||
// An already hashed storage may have been changed with the new range, and therefore should be hashed again.
|
||||
checkpoint.to != to_transition ||
|
||||
checkpoint.from != from_transition
|
||||
{
|
||||
tx.clear::<tables::HashedStorage>()?;
|
||||
|
||||
checkpoint = StorageHashingCheckpoint::default();
|
||||
self.save_checkpoint(tx, checkpoint)?;
|
||||
}
|
||||
|
||||
let mut current_key = checkpoint.address.take();
|
||||
let mut current_subkey = checkpoint.storage.take();
|
||||
let mut keccak_address = None;
|
||||
|
||||
loop {
|
||||
let mut hashed_batch = BTreeMap::new();
|
||||
let mut remaining = self.commit_threshold as usize;
|
||||
{
|
||||
let mut storage = tx.cursor_dup_read::<tables::PlainStorageState>()?;
|
||||
while !remaining.is_zero() {
|
||||
hashed_batch.extend(
|
||||
storage
|
||||
.walk_dup(current_key, current_subkey)?
|
||||
.take(remaining)
|
||||
.map(|res| {
|
||||
res.map(|(address, slot)| {
|
||||
// Address caching for the first iteration when current_key
|
||||
// is None
|
||||
let keccak_address =
|
||||
if let Some(keccak_address) = keccak_address {
|
||||
keccak_address
|
||||
} else {
|
||||
keccak256(address)
|
||||
};
|
||||
let mut hashed_batch = BTreeMap::new();
|
||||
let mut remaining = self.commit_threshold as usize;
|
||||
{
|
||||
let mut storage = tx.cursor_dup_read::<tables::PlainStorageState>()?;
|
||||
while !remaining.is_zero() {
|
||||
hashed_batch.extend(
|
||||
storage
|
||||
.walk_dup(current_key, current_subkey)?
|
||||
.take(remaining)
|
||||
.map(|res| {
|
||||
res.map(|(address, slot)| {
|
||||
// Address caching for the first iteration when current_key
|
||||
// is None
|
||||
let keccak_address =
|
||||
if let Some(keccak_address) = keccak_address {
|
||||
keccak_address
|
||||
} else {
|
||||
keccak256(address)
|
||||
};
|
||||
|
||||
// TODO cache map keccak256(slot.key) ?
|
||||
((keccak_address, keccak256(slot.key)), slot.value)
|
||||
})
|
||||
// TODO cache map keccak256(slot.key) ?
|
||||
((keccak_address, keccak256(slot.key)), slot.value)
|
||||
})
|
||||
.collect::<Result<BTreeMap<_, _>, _>>()?,
|
||||
);
|
||||
})
|
||||
.collect::<Result<BTreeMap<_, _>, _>>()?,
|
||||
);
|
||||
|
||||
remaining = self.commit_threshold as usize - hashed_batch.len();
|
||||
remaining = self.commit_threshold as usize - hashed_batch.len();
|
||||
|
||||
if let Some((address, slot)) = storage.next_dup()? {
|
||||
// There's still some remaining elements on this key, so we need to save
|
||||
// the cursor position for the next
|
||||
// iteration
|
||||
if let Some((address, slot)) = storage.next_dup()? {
|
||||
// There's still some remaining elements on this key, so we need to save
|
||||
// the cursor position for the next
|
||||
// iteration
|
||||
|
||||
current_key = Some(address);
|
||||
current_subkey = Some(slot.key);
|
||||
current_key = Some(address);
|
||||
current_subkey = Some(slot.key);
|
||||
} else {
|
||||
// Go to the next key
|
||||
current_key = storage.next_no_dup()?.map(|(key, _)| key);
|
||||
current_subkey = None;
|
||||
|
||||
// Cache keccak256(address) for the next key if it exists
|
||||
if let Some(address) = current_key {
|
||||
keccak_address = Some(keccak256(address));
|
||||
} else {
|
||||
// Go to the next key
|
||||
current_key = storage.next_no_dup()?.map(|(key, _)| key);
|
||||
current_subkey = None;
|
||||
|
||||
// Cache keccak256(address) for the next key if it exists
|
||||
if let Some(address) = current_key {
|
||||
keccak_address = Some(keccak256(address));
|
||||
} else {
|
||||
// We have reached the end of table
|
||||
break
|
||||
}
|
||||
// We have reached the end of table
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// iterate and put presorted hashed slots
|
||||
hashed_batch.into_iter().try_for_each(|((addr, key), value)| {
|
||||
tx.put::<tables::HashedStorage>(addr, StorageEntry { key, value })
|
||||
})?;
|
||||
// iterate and put presorted hashed slots
|
||||
hashed_batch.into_iter().try_for_each(|((addr, key), value)| {
|
||||
tx.put::<tables::HashedStorage>(addr, StorageEntry { key, value })
|
||||
})?;
|
||||
|
||||
tx.commit()?;
|
||||
if let Some(address) = ¤t_key {
|
||||
checkpoint.address = Some(*address);
|
||||
checkpoint.storage = current_subkey;
|
||||
checkpoint.from = from_transition;
|
||||
checkpoint.to = to_transition;
|
||||
}
|
||||
|
||||
// We have reached the end of table
|
||||
if current_key.is_none() {
|
||||
break
|
||||
}
|
||||
self.save_checkpoint(tx, checkpoint)?;
|
||||
|
||||
if current_key.is_some() {
|
||||
return Ok(ExecOutput { stage_progress, done: false })
|
||||
}
|
||||
} else {
|
||||
// Aggregate all transition changesets and and make list of storages that have been
|
||||
@@ -170,7 +223,6 @@ mod tests {
|
||||
stage_test_suite_ext, ExecuteStageTestRunner, StageTestRunner, TestRunnerError,
|
||||
TestTransaction, UnwindStageTestRunner, PREV_STAGE_ID,
|
||||
};
|
||||
use assert_matches::assert_matches;
|
||||
use reth_db::{
|
||||
cursor::{DbCursorRO, DbCursorRW},
|
||||
mdbx::{tx::Tx, WriteMap, RW},
|
||||
@@ -205,18 +257,25 @@ mod tests {
|
||||
|
||||
runner.seed_execution(input).expect("failed to seed execution");
|
||||
|
||||
let rx = runner.execute(input);
|
||||
loop {
|
||||
if let Ok(result) = runner.execute(input).await.unwrap() {
|
||||
if !result.done {
|
||||
// Continue from checkpoint
|
||||
continue
|
||||
} else {
|
||||
assert!(result.stage_progress == previous_stage);
|
||||
|
||||
// Assert the successful result
|
||||
let result = rx.await.unwrap();
|
||||
assert_matches!(
|
||||
result,
|
||||
Ok(ExecOutput { done, stage_progress })
|
||||
if done && stage_progress == previous_stage
|
||||
);
|
||||
// Validate the stage execution
|
||||
assert!(
|
||||
runner.validate_execution(input, Some(result)).is_ok(),
|
||||
"execution validation"
|
||||
);
|
||||
|
||||
// Validate the stage execution
|
||||
assert!(runner.validate_execution(input, result.ok()).is_ok(), "execution validation");
|
||||
break
|
||||
}
|
||||
}
|
||||
panic!("Failed execution");
|
||||
}
|
||||
}
|
||||
|
||||
struct StorageHashingTestRunner {
|
||||
|
||||
Reference in New Issue
Block a user