refactor(db): use hashed state as canonical state representation (#21115)

Co-authored-by: Amp <amp@ampcode.com>
Co-authored-by: Dan Cline <6798349+Rjected@users.noreply.github.com>
Co-authored-by: joshieDo <93316087+joshieDo@users.noreply.github.com>
This commit is contained in:
Georgios Konstantopoulos
2026-02-12 13:02:02 -05:00
committed by GitHub
parent 7ff78ca082
commit 121160d248
60 changed files with 4341 additions and 484 deletions

View File

@@ -5,6 +5,7 @@ use reth_codecs::Compact;
use reth_db_api::{cursor::DbDupCursorRO, database::Database, tables, transaction::DbTx};
use reth_db_common::DbTool;
use reth_node_builder::NodeTypesWithDB;
use reth_storage_api::StorageSettingsCache;
use std::time::{Duration, Instant};
use tracing::info;
@@ -22,52 +23,94 @@ impl Command {
/// Execute `db account-storage` command
pub fn execute<N: NodeTypesWithDB>(self, tool: &DbTool<N>) -> eyre::Result<()> {
let address = self.address;
let (slot_count, plain_size) = tool.provider_factory.db_ref().view(|tx| {
let mut cursor = tx.cursor_dup_read::<tables::PlainStorageState>()?;
let mut count = 0usize;
let mut total_value_bytes = 0usize;
let mut last_log = Instant::now();
let use_hashed_state = tool.provider_factory.cached_storage_settings().use_hashed_state;
// Walk all storage entries for this address
let walker = cursor.walk_dup(Some(address), None)?;
for entry in walker {
let (_, storage_entry) = entry?;
count += 1;
// StorageEntry encodes as: 32 bytes (key/subkey uncompressed) + compressed U256
let mut buf = Vec::new();
let entry_len = storage_entry.to_compact(&mut buf);
total_value_bytes += entry_len;
let (slot_count, storage_size) = if use_hashed_state {
let hashed_address = keccak256(address);
tool.provider_factory.db_ref().view(|tx| {
let mut cursor = tx.cursor_dup_read::<tables::HashedStorages>()?;
let mut count = 0usize;
let mut total_value_bytes = 0usize;
let mut last_log = Instant::now();
if last_log.elapsed() >= LOG_INTERVAL {
info!(
target: "reth::cli",
address = %address,
slots = count,
key = %storage_entry.key,
"Processing storage slots"
);
last_log = Instant::now();
let walker = cursor.walk_dup(Some(hashed_address), None)?;
for entry in walker {
let (_, storage_entry) = entry?;
count += 1;
let mut buf = Vec::new();
let entry_len = storage_entry.to_compact(&mut buf);
total_value_bytes += entry_len;
if last_log.elapsed() >= LOG_INTERVAL {
info!(
target: "reth::cli",
address = %address,
slots = count,
key = %storage_entry.key,
"Processing hashed storage slots"
);
last_log = Instant::now();
}
}
}
// Add 20 bytes for the Address key (stored once per account in dupsort)
let total_size = if count > 0 { 20 + total_value_bytes } else { 0 };
let total_size = if count > 0 { 32 + total_value_bytes } else { 0 };
Ok::<_, eyre::Report>((count, total_size))
})??;
Ok::<_, eyre::Report>((count, total_size))
})??
} else {
tool.provider_factory.db_ref().view(|tx| {
let mut cursor = tx.cursor_dup_read::<tables::PlainStorageState>()?;
let mut count = 0usize;
let mut total_value_bytes = 0usize;
let mut last_log = Instant::now();
// Estimate hashed storage size: 32-byte B256 key instead of 20-byte Address
let hashed_size_estimate = if slot_count > 0 { plain_size + 12 } else { 0 };
let total_estimate = plain_size + hashed_size_estimate;
// Walk all storage entries for this address
let walker = cursor.walk_dup(Some(address), None)?;
for entry in walker {
let (_, storage_entry) = entry?;
count += 1;
let mut buf = Vec::new();
// StorageEntry encodes as: 32 bytes (key/subkey uncompressed) + compressed U256
let entry_len = storage_entry.to_compact(&mut buf);
total_value_bytes += entry_len;
if last_log.elapsed() >= LOG_INTERVAL {
info!(
target: "reth::cli",
address = %address,
slots = count,
key = %storage_entry.key,
"Processing storage slots"
);
last_log = Instant::now();
}
}
// Add 20 bytes for the Address key (stored once per account in dupsort)
let total_size = if count > 0 { 20 + total_value_bytes } else { 0 };
Ok::<_, eyre::Report>((count, total_size))
})??
};
let hashed_address = keccak256(address);
println!("Account: {address}");
println!("Hashed address: {hashed_address}");
println!("Storage slots: {slot_count}");
println!("Plain storage size: {} (estimated)", human_bytes(plain_size as f64));
println!("Hashed storage size: {} (estimated)", human_bytes(hashed_size_estimate as f64));
println!("Total estimated size: {}", human_bytes(total_estimate as f64));
if use_hashed_state {
println!("Hashed storage size: {} (estimated)", human_bytes(storage_size as f64));
} else {
// Estimate hashed storage size: 32-byte B256 key instead of 20-byte Address
let hashed_size_estimate = if slot_count > 0 { storage_size + 12 } else { 0 };
let total_estimate = storage_size + hashed_size_estimate;
println!("Plain storage size: {} (estimated)", human_bytes(storage_size as f64));
println!(
"Hashed storage size: {} (estimated)",
human_bytes(hashed_size_estimate as f64)
);
println!("Total estimated size: {}", human_bytes(total_estimate as f64));
}
Ok(())
}

View File

@@ -98,7 +98,8 @@ impl Command {
)?;
if let Some(entry) = entry {
println!("{}", serde_json::to_string_pretty(&entry)?);
let se: reth_primitives_traits::StorageEntry = entry.into();
println!("{}", serde_json::to_string_pretty(&se)?);
} else {
error!(target: "reth::cli", "No content for the given table key.");
}
@@ -106,7 +107,14 @@ impl Command {
}
let changesets = provider.storage_changeset(key.block_number())?;
println!("{}", serde_json::to_string_pretty(&changesets)?);
let serializable: Vec<_> = changesets
.into_iter()
.map(|(addr, entry)| {
let se: reth_primitives_traits::StorageEntry = entry.into();
(addr, se)
})
.collect();
println!("{}", serde_json::to_string_pretty(&serializable)?);
return Ok(());
}

View File

@@ -1,4 +1,4 @@
use alloy_primitives::{Address, BlockNumber, B256, U256};
use alloy_primitives::{keccak256, Address, BlockNumber, B256, U256};
use clap::Parser;
use parking_lot::Mutex;
use reth_db_api::{
@@ -63,39 +63,65 @@ impl Command {
address: Address,
limit: usize,
) -> eyre::Result<()> {
let use_hashed_state = tool.provider_factory.cached_storage_settings().use_hashed_state;
let entries = tool.provider_factory.db_ref().view(|tx| {
// Get account info
let account = tx.get::<tables::PlainAccountState>(address)?;
// Get storage entries
let mut cursor = tx.cursor_dup_read::<tables::PlainStorageState>()?;
let mut entries = Vec::new();
let mut last_log = Instant::now();
let walker = cursor.walk_dup(Some(address), None)?;
for (idx, entry) in walker.enumerate() {
let (_, storage_entry) = entry?;
if storage_entry.value != U256::ZERO {
entries.push((storage_entry.key, storage_entry.value));
let (account, walker_entries) = if use_hashed_state {
let hashed_address = keccak256(address);
let account = tx.get::<tables::HashedAccounts>(hashed_address)?;
let mut cursor = tx.cursor_dup_read::<tables::HashedStorages>()?;
let walker = cursor.walk_dup(Some(hashed_address), None)?;
let mut entries = Vec::new();
let mut last_log = Instant::now();
for (idx, entry) in walker.enumerate() {
let (_, storage_entry) = entry?;
if storage_entry.value != U256::ZERO {
entries.push((storage_entry.key, storage_entry.value));
}
if entries.len() >= limit {
break;
}
if last_log.elapsed() >= LOG_INTERVAL {
info!(
target: "reth::cli",
address = %address,
slots_scanned = idx,
"Scanning storage slots"
);
last_log = Instant::now();
}
}
if entries.len() >= limit {
break;
(account, entries)
} else {
// Get account info
let account = tx.get::<tables::PlainAccountState>(address)?;
// Get storage entries
let mut cursor = tx.cursor_dup_read::<tables::PlainStorageState>()?;
let walker = cursor.walk_dup(Some(address), None)?;
let mut entries = Vec::new();
let mut last_log = Instant::now();
for (idx, entry) in walker.enumerate() {
let (_, storage_entry) = entry?;
if storage_entry.value != U256::ZERO {
entries.push((storage_entry.key, storage_entry.value));
}
if entries.len() >= limit {
break;
}
if last_log.elapsed() >= LOG_INTERVAL {
info!(
target: "reth::cli",
address = %address,
slots_scanned = idx,
"Scanning storage slots"
);
last_log = Instant::now();
}
}
(account, entries)
};
if last_log.elapsed() >= LOG_INTERVAL {
info!(
target: "reth::cli",
address = %address,
slots_scanned = idx,
"Scanning storage slots"
);
last_log = Instant::now();
}
}
Ok::<_, eyre::Report>((account, entries))
Ok::<_, eyre::Report>((account, walker_entries))
})??;
let (account, storage_entries) = entries;

View File

@@ -119,8 +119,8 @@ pub struct NodeCommand<C: ChainSpecParser, Ext: clap::Args + fmt::Debug = NoArgs
#[command(flatten, next_help_heading = "Static Files")]
pub static_files: StaticFilesArgs,
/// Storage mode configuration (v2 vs v1/legacy)
#[command(flatten)]
/// All storage related arguments with --storage prefix
#[command(flatten, next_help_heading = "Storage")]
pub storage: StorageArgs,
/// Additional cli arguments

View File

@@ -131,8 +131,15 @@ impl<C: ChainSpecParser> Command<C> {
reset_stage_checkpoint(tx, StageId::SenderRecovery)?;
}
StageEnum::Execution => {
tx.clear::<tables::PlainAccountState>()?;
tx.clear::<tables::PlainStorageState>()?;
if provider_rw.cached_storage_settings().use_hashed_state {
tx.clear::<tables::HashedAccounts>()?;
tx.clear::<tables::HashedStorages>()?;
reset_stage_checkpoint(tx, StageId::AccountHashing)?;
reset_stage_checkpoint(tx, StageId::StorageHashing)?;
} else {
tx.clear::<tables::PlainAccountState>()?;
tx.clear::<tables::PlainStorageState>()?;
}
tx.clear::<tables::AccountChangeSets>()?;
tx.clear::<tables::StorageChangeSets>()?;
tx.clear::<tables::Bytecodes>()?;