feat(cli): in-memory merkle debug script (#3895)

This commit is contained in:
Roman Krasiuk
2023-07-28 14:03:07 +03:00
committed by GitHub
parent 0892833842
commit 703d5c705a
7 changed files with 301 additions and 3 deletions

2
Cargo.lock generated
View File

@@ -4969,6 +4969,7 @@ dependencies = [
"reth-beacon-consensus",
"reth-blockchain-tree",
"reth-config",
"reth-consensus-common",
"reth-db",
"reth-discv4",
"reth-downloaders",
@@ -4991,6 +4992,7 @@ dependencies = [
"reth-tasks",
"reth-tracing",
"reth-transaction-pool",
"reth-trie",
"secp256k1",
"serde",
"serde_json",

View File

@@ -21,6 +21,7 @@ reth-interfaces = { workspace = true, features = ["test-utils", "clap"] }
reth-transaction-pool.workspace = true
reth-beacon-consensus = { path = "../../crates/consensus/beacon" }
reth-auto-seal-consensus = { path = "../../crates/consensus/auto-seal" }
reth-consensus-common = { path = "../../crates/consensus/common" }
reth-blockchain-tree = { path = "../../crates/blockchain-tree" }
reth-rpc-engine-api = { path = "../../crates/rpc/rpc-engine-api" }
reth-rpc-builder = { path = "../../crates/rpc/rpc-builder" }
@@ -37,6 +38,7 @@ reth-basic-payload-builder = { path = "../../crates/payload/basic" }
reth-discv4 = { path = "../../crates/net/discv4" }
reth-metrics.workspace = true
reth-prune = { path = "../../crates/prune" }
reth-trie = { path = "../../crates/trie" }
# crypto
secp256k1 = { workspace = true, features = ["global-context", "rand-std", "recovery"] }

View File

@@ -42,7 +42,7 @@ use std::{
use tokio::sync::watch;
use tracing::*;
/// `reth execution-debug` command
/// `reth debug execution` command
#[derive(Debug, Parser)]
pub struct Command {
/// The path to the data dir for all reth files and subdirectories.

View File

@@ -0,0 +1,257 @@
//! Command for debugging in-memory merkle trie calculation.
use crate::{
args::{get_secret_key, utils::genesis_value_parser, DatabaseArgs, NetworkArgs},
dirs::{DataDirPath, MaybePlatformPath},
runner::CliContext,
utils::{get_single_body, get_single_header},
};
use backon::{ConstantBuilder, Retryable};
use clap::Parser;
use reth_config::Config;
use reth_db::{init_db, DatabaseEnv};
use reth_discv4::DEFAULT_DISCOVERY_PORT;
use reth_network::NetworkHandle;
use reth_network_api::NetworkInfo;
use reth_primitives::{fs, stage::StageId, BlockHashOrNumber, ChainSpec};
use reth_provider::{
AccountExtReader, BlockExecutor, BlockWriter, ExecutorFactory, HashingWriter, HeaderProvider,
LatestStateProviderRef, ProviderFactory, StageCheckpointReader, StorageReader,
};
use reth_tasks::TaskExecutor;
use reth_trie::{hashed_cursor::HashedPostStateCursorFactory, updates::TrieKey, StateRoot};
use std::{
net::{Ipv4Addr, SocketAddr, SocketAddrV4},
path::PathBuf,
sync::Arc,
};
use tracing::*;
/// `reth debug in-memory-merkle` command
/// This debug routine requires that the node is positioned at the block before the target.
/// The script will then download the block from p2p network and attempt to calculate and verify
/// merkle root for it.
#[derive(Debug, Parser)]
pub struct Command {
/// The path to the data dir for all reth files and subdirectories.
///
/// Defaults to the OS-specific data directory:
///
/// - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/`
/// - Windows: `{FOLDERID_RoamingAppData}/reth/`
/// - macOS: `$HOME/Library/Application Support/reth/`
#[arg(long, value_name = "DATA_DIR", verbatim_doc_comment, default_value_t)]
datadir: MaybePlatformPath<DataDirPath>,
/// The chain this node is running.
///
/// Possible values are either a built-in chain or the path to a chain specification file.
///
/// Built-in chains:
/// - mainnet
/// - goerli
/// - sepolia
#[arg(
long,
value_name = "CHAIN_OR_PATH",
verbatim_doc_comment,
default_value = "mainnet",
value_parser = genesis_value_parser
)]
chain: Arc<ChainSpec>,
#[clap(flatten)]
db: DatabaseArgs,
#[clap(flatten)]
network: NetworkArgs,
/// The number of retries per request
#[arg(long, default_value = "5")]
retries: usize,
/// The depth after which we should start comparing branch nodes
#[arg(long)]
skip_node_depth: Option<usize>,
}
impl Command {
async fn build_network(
&self,
config: &Config,
task_executor: TaskExecutor,
db: Arc<DatabaseEnv>,
network_secret_path: PathBuf,
default_peers_path: PathBuf,
) -> eyre::Result<NetworkHandle> {
let secret_key = get_secret_key(&network_secret_path)?;
let network = self
.network
.network_config(config, self.chain.clone(), secret_key, default_peers_path)
.with_task_executor(Box::new(task_executor))
.listener_addr(SocketAddr::V4(SocketAddrV4::new(
Ipv4Addr::UNSPECIFIED,
self.network.port.unwrap_or(DEFAULT_DISCOVERY_PORT),
)))
.discovery_addr(SocketAddr::V4(SocketAddrV4::new(
Ipv4Addr::UNSPECIFIED,
self.network.discovery.port.unwrap_or(DEFAULT_DISCOVERY_PORT),
)))
.build(ProviderFactory::new(db, self.chain.clone()))
.start_network()
.await?;
info!(target: "reth::cli", peer_id = %network.peer_id(), local_addr = %network.local_addr(), "Connected to P2P network");
debug!(target: "reth::cli", peer_id = ?network.peer_id(), "Full peer ID");
Ok(network)
}
/// Execute `debug in-memory-merkle` command
pub async fn execute(self, ctx: CliContext) -> eyre::Result<()> {
let config = Config::default();
// add network name to data dir
let data_dir = self.datadir.unwrap_or_chain_default(self.chain.chain);
let db_path = data_dir.db_path();
fs::create_dir_all(&db_path)?;
// initialize the database
let db = Arc::new(init_db(db_path, self.db.log_level)?);
let factory = ProviderFactory::new(&db, self.chain.clone());
let provider = factory.provider()?;
// Look up merkle checkpoint
let merkle_checkpoint = provider
.get_stage_checkpoint(StageId::MerkleExecute)?
.expect("merkle checkpoint exists");
let merkle_block_number = merkle_checkpoint.block_number;
// Configure and build network
let network_secret_path =
self.network.p2p_secret_key.clone().unwrap_or_else(|| data_dir.p2p_secret_path());
let network = self
.build_network(
&config,
ctx.task_executor.clone(),
db.clone(),
network_secret_path,
data_dir.known_peers_path(),
)
.await?;
let target_block_number = merkle_block_number + 1;
info!(target: "reth::cli", target_block_number, "Downloading full block");
let fetch_client = network.fetch_client().await?;
let retries = self.retries.max(1);
let backoff = ConstantBuilder::default().with_max_times(retries);
let client = fetch_client.clone();
let header = (move || {
get_single_header(client.clone(), BlockHashOrNumber::Number(target_block_number))
})
.retry(&backoff)
.notify(|err, _| warn!(target: "reth::cli", "Error requesting header: {err}. Retrying..."))
.await?;
let client = fetch_client.clone();
let chain = Arc::clone(&self.chain);
let block = (move || get_single_body(client.clone(), Arc::clone(&chain), header.clone()))
.retry(&backoff)
.notify(
|err, _| warn!(target: "reth::cli", "Error requesting body: {err}. Retrying..."),
)
.await?;
let executor_factory = reth_revm::Factory::new(self.chain.clone());
let mut executor = executor_factory.with_sp(LatestStateProviderRef::new(provider.tx_ref()));
let merkle_block_td =
provider.header_td_by_number(merkle_block_number)?.unwrap_or_default();
let block_state = executor.execute_and_verify_receipt(
&block.clone().unseal(),
merkle_block_td + block.difficulty,
None,
)?;
// Unpacked `PostState::state_root_slow` function
let hashed_post_state = block_state.hash_state_slow().sorted();
let (account_prefix_set, storage_prefix_set) = hashed_post_state.construct_prefix_sets();
let tx = provider.tx_ref();
let hashed_cursor_factory = HashedPostStateCursorFactory::new(tx, &hashed_post_state);
let (in_memory_state_root, in_memory_updates) = StateRoot::new(tx)
.with_hashed_cursor_factory(&hashed_cursor_factory)
.with_changed_account_prefixes(account_prefix_set)
.with_changed_storage_prefixes(storage_prefix_set)
.root_with_updates()?;
if in_memory_state_root == block.state_root {
info!(target: "reth::cli", state_root = ?in_memory_state_root, "Computed in-memory state root matches");
return Ok(())
}
let provider_rw = factory.provider_rw()?;
// Insert block, state and hashes
provider_rw.insert_block(block.clone(), None)?;
block_state.write_to_db(provider_rw.tx_ref(), block.number)?;
let storage_lists = provider_rw.changed_storages_with_range(block.number..=block.number)?;
let storages = provider_rw.plainstate_storages(storage_lists)?;
provider_rw.insert_storage_for_hashing(storages)?;
let account_lists = provider_rw.changed_accounts_with_range(block.number..=block.number)?;
let accounts = provider_rw.basic_accounts(account_lists)?;
provider_rw.insert_account_for_hashing(accounts)?;
let (state_root, incremental_trie_updates) = StateRoot::incremental_root_with_updates(
provider_rw.tx_ref(),
block.number..=block.number,
)?;
if state_root != block.state_root {
eyre::bail!(
"Computed incremental state root mismatch. Expected: {:?}. Got: {:?}",
block.state_root,
state_root
);
}
// Compare updates
let mut in_mem_mismatched = Vec::new();
let mut incremental_mismatched = Vec::new();
let mut in_mem_updates_iter = in_memory_updates.into_iter().peekable();
let mut incremental_updates_iter = incremental_trie_updates.into_iter().peekable();
while in_mem_updates_iter.peek().is_some() || incremental_updates_iter.peek().is_some() {
match (in_mem_updates_iter.next(), incremental_updates_iter.next()) {
(Some(in_mem), Some(incr)) => {
pretty_assertions::assert_eq!(in_mem.0, incr.0, "Nibbles don't match");
if in_mem.1 != incr.1 &&
matches!(in_mem.0, TrieKey::AccountNode(ref nibbles) if nibbles.inner.len() > self.skip_node_depth.unwrap_or_default())
{
in_mem_mismatched.push(in_mem);
incremental_mismatched.push(incr);
}
}
(Some(in_mem), None) => {
warn!(target: "reth::cli", next = ?in_mem, "In-memory trie updates have more entries");
}
(None, Some(incr)) => {
tracing::warn!(target: "reth::cli", next = ?incr, "Incremental trie updates have more entries");
}
(None, None) => {
tracing::info!(target: "reth::cli", "Exhausted all trie updates entries");
}
}
}
pretty_assertions::assert_eq!(
incremental_mismatched,
in_mem_mismatched,
"Mismatched trie updates"
);
// Drop without comitting.
drop(provider_rw);
Ok(())
}
}

View File

@@ -4,6 +4,7 @@ use clap::{Parser, Subcommand};
use crate::runner::CliContext;
mod execution;
mod in_memory_merkle;
mod merkle;
/// `reth debug` command
@@ -20,6 +21,8 @@ pub enum Subcommands {
Execution(execution::Command),
/// Debug the clean & incremental state root calculations.
Merkle(merkle::Command),
/// Debug in-memory state root calculation.
InMemoryMerkle(in_memory_merkle::Command),
}
impl Command {
@@ -28,6 +31,7 @@ impl Command {
match self.command {
Subcommands::Execution(command) => command.execute(ctx).await,
Subcommands::Merkle(command) => command.execute().await,
Subcommands::InMemoryMerkle(command) => command.execute(ctx).await,
}
}
}

View File

@@ -1,6 +1,7 @@
//! Common CLI utility functions.
use eyre::Result;
use reth_consensus_common::validation::validate_block_standalone;
use reth_db::{
cursor::DbCursorRO,
database::Database,
@@ -8,10 +9,13 @@ use reth_db::{
transaction::{DbTx, DbTxMut},
};
use reth_interfaces::p2p::{
bodies::client::BodiesClient,
headers::client::{HeadersClient, HeadersRequest},
priority::Priority,
};
use reth_primitives::{fs, BlockHashOrNumber, ChainSpec, HeadersDirection, SealedHeader};
use reth_primitives::{
fs, BlockHashOrNumber, ChainSpec, HeadersDirection, SealedBlock, SealedHeader,
};
use std::{
env::VarError,
path::{Path, PathBuf},
@@ -56,6 +60,35 @@ where
Ok(header)
}
/// Get a body from network based on header
pub async fn get_single_body<Client>(
client: Client,
chain_spec: Arc<ChainSpec>,
header: SealedHeader,
) -> Result<SealedBlock>
where
Client: BodiesClient,
{
let (peer_id, response) = client.get_block_body(header.hash).await?.split();
if response.is_none() {
client.report_bad_message(peer_id);
eyre::bail!("Invalid number of bodies received. Expected: 1. Received: 0")
}
let block = response.unwrap();
let block = SealedBlock {
header,
body: block.transactions,
ommers: block.ommers,
withdrawals: block.withdrawals,
};
validate_block_standalone(&block, &chain_spec)?;
Ok(block)
}
/// Wrapper over DB that implements many useful DB queries.
pub struct DbTool<'a, DB: Database> {
pub(crate) db: &'a DB,

View File

@@ -22,7 +22,7 @@ pub enum TrieKey {
}
/// The operation to perform on the trie.
#[derive(Debug, Clone)]
#[derive(PartialEq, Eq, Debug, Clone)]
pub enum TrieOp {
/// Delete the node entry.
Delete,