feat(cli): add reth db checksum mdbx/static-file command (#21211)

Co-authored-by: Amp <amp@ampcode.com>
Co-authored-by: joshieDo <93316087+joshieDo@users.noreply.github.com>
This commit is contained in:
Georgios Konstantopoulos
2026-01-20 08:11:51 -08:00
committed by GitHub
parent 39d5ae73e8
commit 4f009728e2
13 changed files with 920 additions and 55 deletions

View File

@@ -4,15 +4,17 @@ use crate::{
};
use alloy_primitives::map::foldhash::fast::FixedState;
use clap::Parser;
use itertools::Itertools;
use reth_chainspec::EthereumHardforks;
use reth_db::DatabaseEnv;
use reth_db::{static_file::iter_static_files, DatabaseEnv};
use reth_db_api::{
cursor::DbCursorRO, table::Table, transaction::DbTx, RawKey, RawTable, RawValue, TableViewer,
Tables,
};
use reth_db_common::DbTool;
use reth_node_builder::{NodeTypesWithDB, NodeTypesWithDBAdapter};
use reth_provider::{providers::ProviderNodeTypes, DBProvider};
use reth_provider::{providers::ProviderNodeTypes, DBProvider, StaticFileProviderFactory};
use reth_static_file_types::StaticFileSegment;
use std::{
hash::{BuildHasher, Hasher},
sync::Arc,
@@ -20,24 +22,54 @@ use std::{
};
use tracing::{info, warn};
/// Interval for logging progress during checksum computation.
const PROGRESS_LOG_INTERVAL: usize = 100_000;
#[derive(Parser, Debug)]
/// The arguments for the `reth db checksum` command
pub struct Command {
/// The table name
table: Tables,
#[command(subcommand)]
subcommand: Subcommand,
}
/// The start of the range to checksum.
#[arg(long, value_parser = maybe_json_value_parser)]
start_key: Option<String>,
#[derive(clap::Subcommand, Debug)]
enum Subcommand {
/// Calculates the checksum of a database table
Mdbx {
/// The table name
table: Tables,
/// The end of the range to checksum.
#[arg(long, value_parser = maybe_json_value_parser)]
end_key: Option<String>,
/// The start of the range to checksum.
#[arg(long, value_parser = maybe_json_value_parser)]
start_key: Option<String>,
/// The maximum number of records that are queried and used to compute the
/// checksum.
#[arg(long)]
limit: Option<usize>,
/// The end of the range to checksum.
#[arg(long, value_parser = maybe_json_value_parser)]
end_key: Option<String>,
/// The maximum number of records that are queried and used to compute the
/// checksum.
#[arg(long)]
limit: Option<usize>,
},
/// Calculates the checksum of a static file segment
StaticFile {
/// The static file segment
#[arg(value_enum)]
segment: StaticFileSegment,
/// The block number to start from (inclusive).
#[arg(long)]
start_block: Option<u64>,
/// The block number to end at (inclusive).
#[arg(long)]
end_block: Option<u64>,
/// The maximum number of rows to checksum.
#[arg(long)]
limit: Option<usize>,
},
}
impl Command {
@@ -47,16 +79,109 @@ impl Command {
tool: &DbTool<NodeTypesWithDBAdapter<N, Arc<DatabaseEnv>>>,
) -> eyre::Result<()> {
warn!("This command should be run without the node running!");
self.table.view(&ChecksumViewer {
tool,
start_key: self.start_key,
end_key: self.end_key,
limit: self.limit,
})?;
match self.subcommand {
Subcommand::Mdbx { table, start_key, end_key, limit } => {
table.view(&ChecksumViewer { tool, start_key, end_key, limit })?;
}
Subcommand::StaticFile { segment, start_block, end_block, limit } => {
checksum_static_file(tool, segment, start_block, end_block, limit)?;
}
}
Ok(())
}
}
/// Creates a new hasher with the standard seed used for checksum computation.
fn checksum_hasher() -> impl Hasher {
FixedState::with_seed(u64::from_be_bytes(*b"RETHRETH")).build_hasher()
}
fn checksum_static_file<N: CliNodeTypes<ChainSpec: EthereumHardforks>>(
tool: &DbTool<NodeTypesWithDBAdapter<N, Arc<DatabaseEnv>>>,
segment: StaticFileSegment,
start_block: Option<u64>,
end_block: Option<u64>,
limit: Option<usize>,
) -> eyre::Result<()> {
let static_file_provider = tool.provider_factory.static_file_provider();
if let Err(err) = static_file_provider.check_consistency(&tool.provider_factory.provider()?) {
warn!("Error checking consistency of static files: {err}");
}
let static_files = iter_static_files(static_file_provider.directory())?;
let ranges = static_files
.get(segment)
.ok_or_else(|| eyre::eyre!("No static files found for segment: {}", segment))?;
let start_time = Instant::now();
let mut hasher = checksum_hasher();
let mut total = 0usize;
let limit = limit.unwrap_or(usize::MAX);
let start_block = start_block.unwrap_or(0);
let end_block = end_block.unwrap_or(u64::MAX);
info!(
"Computing checksum for {} static files, start_block={}, end_block={}, limit={:?}",
segment,
start_block,
end_block,
if limit == usize::MAX { None } else { Some(limit) }
);
'outer: for (block_range, _header) in ranges.iter().sorted_by_key(|(range, _)| range.start()) {
if block_range.end() < start_block || block_range.start() > end_block {
continue;
}
let fixed_block_range = static_file_provider.find_fixed_range(segment, block_range.start());
let jar_provider = static_file_provider
.get_segment_provider_for_range(segment, || Some(fixed_block_range), None)?
.ok_or_else(|| {
eyre::eyre!(
"Failed to get segment provider for segment {} at range {}",
segment,
block_range
)
})?;
let mut cursor = jar_provider.cursor()?;
while let Ok(Some(row)) = cursor.next_row() {
for col_data in row.iter() {
hasher.write(col_data);
}
total += 1;
if total.is_multiple_of(PROGRESS_LOG_INTERVAL) {
info!("Hashed {total} entries.");
}
if total >= limit {
break 'outer;
}
}
// Explicitly drop provider before removing from cache to avoid deadlock
drop(jar_provider);
static_file_provider.remove_cached_provider(segment, fixed_block_range.end());
}
let checksum = hasher.finish();
let elapsed = start_time.elapsed();
info!(
"Checksum for static file segment `{}`: {:#x} ({} entries, elapsed: {:?})",
segment, checksum, total, elapsed
);
Ok(())
}
pub(crate) struct ChecksumViewer<'a, N: NodeTypesWithDB> {
tool: &'a DbTool<N>,
start_key: Option<String>,
@@ -102,7 +227,7 @@ impl<N: ProviderNodeTypes> TableViewer<(u64, Duration)> for ChecksumViewer<'_, N
};
let start_time = Instant::now();
let mut hasher = FixedState::with_seed(u64::from_be_bytes(*b"RETHRETH")).build_hasher();
let mut hasher = checksum_hasher();
let mut total = 0;
let limit = self.limit.unwrap_or(usize::MAX);
@@ -111,7 +236,7 @@ impl<N: ProviderNodeTypes> TableViewer<(u64, Duration)> for ChecksumViewer<'_, N
for (index, entry) in walker.enumerate() {
let (k, v): (RawKey<T::Key>, RawValue<T::Value>) = entry?;
if index.is_multiple_of(100_000) {
if index.is_multiple_of(PROGRESS_LOG_INTERVAL) {
info!("Hashed {index} entries.");
}

View File

@@ -39,7 +39,7 @@ pub enum Subcommands {
Stats(stats::Command),
/// Lists the contents of a table
List(list::Command),
/// Calculates the content checksum of a table
/// Calculates the content checksum of a table or static file segment
Checksum(checksum::Command),
/// Create a diff between two database tables or two entire databases.
Diff(diff::Command),