diff --git a/Cargo.lock b/Cargo.lock index e668dd6ad7..d091d564a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7381,6 +7381,7 @@ dependencies = [ "reth-discv5", "reth-downloaders", "reth-ecies", + "reth-era", "reth-era-downloader", "reth-era-utils", "reth-eth-wire", diff --git a/crates/cli/commands/Cargo.toml b/crates/cli/commands/Cargo.toml index 548049bd7a..06ceb9423c 100644 --- a/crates/cli/commands/Cargo.toml +++ b/crates/cli/commands/Cargo.toml @@ -24,6 +24,7 @@ reth-db-common.workspace = true reth-downloaders.workspace = true reth-ecies.workspace = true reth-eth-wire.workspace = true +reth-era.workspace = true reth-era-downloader.workspace = true reth-era-utils.workspace = true reth-etl.workspace = true diff --git a/crates/cli/commands/src/export_era.rs b/crates/cli/commands/src/export_era.rs new file mode 100644 index 0000000000..dbedf1852e --- /dev/null +++ b/crates/cli/commands/src/export_era.rs @@ -0,0 +1,109 @@ +//! Command exporting block data to convert them to ERA1 files. + +use crate::common::{AccessRights, CliNodeTypes, Environment, EnvironmentArgs}; +use clap::{Args, Parser}; +use reth_chainspec::{EthChainSpec, EthereumHardforks}; +use reth_cli::chainspec::ChainSpecParser; +use reth_era::execution_types::MAX_BLOCKS_PER_ERA1; +use reth_era_utils as era1; +use reth_provider::DatabaseProviderFactory; +use std::{path::PathBuf, sync::Arc}; +use tracing::info; + +// Default folder name for era1 export files +const ERA1_EXPORT_FOLDER_NAME: &str = "era1-export"; + +#[derive(Debug, Parser)] +pub struct ExportEraCommand { + #[command(flatten)] + env: EnvironmentArgs, + + #[clap(flatten)] + export: ExportArgs, +} + +#[derive(Debug, Args)] +pub struct ExportArgs { + /// Optional first block number to export from the db. + /// It is by default 0. + #[arg(long, value_name = "first-block-number", verbatim_doc_comment)] + first_block_number: Option, + /// Optional last block number to export from the db. + /// It is by default 8191. + #[arg(long, value_name = "last-block-number", verbatim_doc_comment)] + last_block_number: Option, + /// The maximum number of blocks per file, it can help you to decrease the size of the files. + /// Must be less than or equal to 8192. + #[arg(long, value_name = "max-blocks-per-file", verbatim_doc_comment)] + max_blocks_per_file: Option, + /// The directory path where to export era1 files. + /// The block data are read from the database. + #[arg(long, value_name = "EXPORT_ERA1_PATH", verbatim_doc_comment)] + path: Option, +} + +impl> ExportEraCommand { + /// Execute `export-era` command + pub async fn execute(self) -> eyre::Result<()> + where + N: CliNodeTypes, + { + let Environment { provider_factory, .. } = self.env.init::(AccessRights::RO)?; + + // Either specified path or default to `//era1-export/` + let data_dir = match &self.export.path { + Some(path) => path.clone(), + None => self + .env + .datadir + .resolve_datadir(self.env.chain.chain()) + .data_dir() + .join(ERA1_EXPORT_FOLDER_NAME), + }; + + let export_config = era1::ExportConfig { + network: self.env.chain.chain().to_string(), + first_block_number: self.export.first_block_number.unwrap_or(0), + last_block_number: self + .export + .last_block_number + .unwrap_or(MAX_BLOCKS_PER_ERA1 as u64 - 1), + max_blocks_per_file: self + .export + .max_blocks_per_file + .unwrap_or(MAX_BLOCKS_PER_ERA1 as u64), + dir: data_dir, + }; + + export_config.validate()?; + + info!( + target: "reth::cli", + "Starting ERA1 block export: blocks {}-{} to {}", + export_config.first_block_number, + export_config.last_block_number, + export_config.dir.display() + ); + + // Only read access is needed for the database provider + let provider = provider_factory.database_provider_ro()?; + + let exported_files = era1::export(&provider, &export_config)?; + + info!( + target: "reth::cli", + "Successfully exported {} ERA1 files to {}", + exported_files.len(), + export_config.dir.display() + ); + + Ok(()) + } +} + +impl ExportEraCommand { + /// Returns the underlying chain being used to run this command + pub fn chain_spec(&self) -> Option<&Arc> { + Some(&self.env.chain) + } +} diff --git a/crates/cli/commands/src/lib.rs b/crates/cli/commands/src/lib.rs index ed57a55aae..bf4504074a 100644 --- a/crates/cli/commands/src/lib.rs +++ b/crates/cli/commands/src/lib.rs @@ -13,6 +13,7 @@ pub mod config_cmd; pub mod db; pub mod download; pub mod dump_genesis; +pub mod export_era; pub mod import; pub mod import_era; pub mod import_op; diff --git a/crates/era-utils/src/export.rs b/crates/era-utils/src/export.rs index 5ff1a0d78c..f76b3f82a1 100644 --- a/crates/era-utils/src/export.rs +++ b/crates/era-utils/src/export.rs @@ -18,7 +18,7 @@ use std::{ path::PathBuf, time::{Duration, Instant}, }; -use tracing::{info, warn}; +use tracing::{debug, info, warn}; const REPORT_INTERVAL_SECS: u64 = 10; const ENTRY_HEADER_SIZE: usize = 8; @@ -38,7 +38,7 @@ pub struct ExportConfig { /// It can never be larger than `MAX_BLOCKS_PER_ERA1 = 8192` /// See also <`https://github.com/eth-clients/e2store-format-specs/blob/main/formats/era1.md`> pub max_blocks_per_file: u64, - /// Network name + /// Network name. pub network: String, } @@ -133,7 +133,19 @@ where let headers = provider.headers_range(start_block..=end_block)?; - let era1_id = Era1Id::new(&config.network, start_block, block_count as u32); + // Extract first 4 bytes of last block's state root as historical identifier + let historical_root = headers + .last() + .map(|header| { + let state_root = header.state_root(); + [state_root[0], state_root[1], state_root[2], state_root[3]] + }) + .unwrap_or([0u8; 4]); + + let era1_id = Era1Id::new(&config.network, start_block, block_count as u32) + .with_hash(historical_root); + + debug!("Final file name {}", era1_id.to_file_name()); let file_path = config.dir.join(era1_id.to_file_name()); let file = std::fs::File::create(&file_path)?; let mut writer = Era1Writer::new(file); diff --git a/crates/era-utils/tests/it/genesis.rs b/crates/era-utils/tests/it/genesis.rs index dacef15eea..0c35c458aa 100644 --- a/crates/era-utils/tests/it/genesis.rs +++ b/crates/era-utils/tests/it/genesis.rs @@ -23,7 +23,10 @@ fn test_export_with_genesis_only() { let file_path = &exported_files[0]; assert!(file_path.exists(), "Exported file should exist on disk"); let file_name = file_path.file_name().unwrap().to_str().unwrap(); - assert!(file_name.starts_with("mainnet-0-"), "File should have correct prefix"); + assert!( + file_name.starts_with("mainnet-00000-00001-"), + "File should have correct prefix with era format" + ); assert!(file_name.ends_with(".era1"), "File should have correct extension"); let metadata = fs::metadata(file_path).unwrap(); assert!(metadata.len() > 0, "Exported file should not be empty"); diff --git a/crates/era-utils/tests/it/history.rs b/crates/era-utils/tests/it/history.rs index 4811e72953..8e720f1001 100644 --- a/crates/era-utils/tests/it/history.rs +++ b/crates/era-utils/tests/it/history.rs @@ -1,6 +1,7 @@ use crate::{ClientWithFakeIndex, ITHACA_ERA_INDEX_URL}; use reqwest::{Client, Url}; use reth_db_common::init::init_genesis; +use reth_era::execution_types::MAX_BLOCKS_PER_ERA1; use reth_era_downloader::{EraClient, EraStream, EraStreamConfig}; use reth_era_utils::{export, import, ExportConfig}; use reth_etl::Collector; @@ -129,10 +130,30 @@ async fn test_roundtrip_export_after_import() { blocks_numbers_per_file ); - // Verify exact ERA1 naming convention: `mainnet-{start_block}-{block_count}.era1` + // Verify format: mainnet-{era_number:05}-{era_count:05}-{8hexchars}.era1 + let era_number = file_start_block / MAX_BLOCKS_PER_ERA1 as u64; + + // Era count is always 1 for this test, as we are only exporting one era + let expected_prefix = format!("mainnet-{:05}-{:05}-", era_number, 1); + let file_name = file_path.file_name().unwrap().to_str().unwrap(); - let expected_filename = - format!("mainnet-{file_start_block}-{blocks_numbers_per_file}.era1"); - assert_eq!(file_name, expected_filename, "File {} should have correct name", i + 1); + assert!( + file_name.starts_with(&expected_prefix), + "File {} should start with '{expected_prefix}', got '{file_name}'", + i + 1 + ); + + // Verify the hash part is 8 characters + let hash_start = expected_prefix.len(); + let hash_end = file_name.len() - 5; // remove ".era1" + let hash_part = &file_name[hash_start..hash_end]; + assert_eq!( + hash_part.len(), + 8, + "File {} hash should be 8 characters, got {} in '{}'", + i + 1, + hash_part.len(), + file_name + ); } } diff --git a/crates/era/src/era1_types.rs b/crates/era/src/era1_types.rs index 135f7225f6..3078f95297 100644 --- a/crates/era/src/era1_types.rs +++ b/crates/era/src/era1_types.rs @@ -4,7 +4,7 @@ use crate::{ e2s_types::{E2sError, Entry}, - execution_types::{Accumulator, BlockTuple}, + execution_types::{Accumulator, BlockTuple, MAX_BLOCKS_PER_ERA1}, }; use alloy_primitives::BlockNumber; @@ -155,6 +155,7 @@ pub struct Era1Id { pub block_count: u32, /// Optional hash identifier for this file + /// First 4 bytes of the last historical root in the last state in the era file pub hash: Option<[u8; 4]>, } @@ -174,24 +175,38 @@ impl Era1Id { self } - /// Convert to file name following the era1 file naming: - /// `--.era1` - /// inspired from era file naming convention in + /// Convert to file name following the era file naming: + /// `---.era(1)` /// /// See also pub fn to_file_name(&self) -> String { + // Find which era the first block belongs to + let era_number = self.start_block / MAX_BLOCKS_PER_ERA1 as u64; + let era_count = self.calculate_era_count(era_number); if let Some(hash) = self.hash { - // Format with zero-padded era number and hash: - // For example network-00000-5ec1ffb8.era1 format!( - "{}-{:05}-{:02x}{:02x}{:02x}{:02x}.era1", - self.network_name, self.start_block, hash[0], hash[1], hash[2], hash[3] + "{}-{:05}-{:05}-{:02x}{:02x}{:02x}{:02x}.era1", + self.network_name, era_number, era_count, hash[0], hash[1], hash[2], hash[3] ) } else { - // Original format without hash - format!("{}-{}-{}.era1", self.network_name, self.start_block, self.block_count) + // era spec format with placeholder hash when no hash available + // Format: `---00000000.era1` + format!("{}-{:05}-{:05}-00000000.era1", self.network_name, era_number, era_count) } } + + // Helper function to calculate the number of eras per era1 file, + // If the user can decide how many blocks per era1 file there are, we need to calculate it. + // Most of the time it should be 1, but it can never be more than 2 eras per file + // as there is a maximum of 8192 blocks per era1 file. + const fn calculate_era_count(&self, first_era: u64) -> u64 { + // Calculate the actual last block number in the range + let last_block = self.start_block + self.block_count as u64 - 1; + // Find which era the last block belongs to + let last_era = last_block / MAX_BLOCKS_PER_ERA1 as u64; + // Count how many eras we span + last_era - first_era + 1 + } } #[cfg(test)] @@ -330,33 +345,33 @@ mod tests { #[test_case::test_case( Era1Id::new("mainnet", 0, 8192).with_hash([0x5e, 0xc1, 0xff, 0xb8]), - "mainnet-00000-5ec1ffb8.era1"; - "Mainnet 00000" + "mainnet-00000-00001-5ec1ffb8.era1"; + "Mainnet era 0" )] #[test_case::test_case( - Era1Id::new("mainnet", 12, 8192).with_hash([0x5e, 0xcb, 0x9b, 0xf9]), - "mainnet-00012-5ecb9bf9.era1"; - "Mainnet 00012" + Era1Id::new("mainnet", 8192, 8192).with_hash([0x5e, 0xcb, 0x9b, 0xf9]), + "mainnet-00001-00001-5ecb9bf9.era1"; + "Mainnet era 1" )] #[test_case::test_case( - Era1Id::new("sepolia", 5, 8192).with_hash([0x90, 0x91, 0x84, 0x72]), - "sepolia-00005-90918472.era1"; - "Sepolia 00005" + Era1Id::new("sepolia", 0, 8192).with_hash([0x90, 0x91, 0x84, 0x72]), + "sepolia-00000-00001-90918472.era1"; + "Sepolia era 0" )] #[test_case::test_case( - Era1Id::new("sepolia", 19, 8192).with_hash([0xfa, 0x77, 0x00, 0x19]), - "sepolia-00019-fa770019.era1"; - "Sepolia 00019" + Era1Id::new("sepolia", 155648, 8192).with_hash([0xfa, 0x77, 0x00, 0x19]), + "sepolia-00019-00001-fa770019.era1"; + "Sepolia era 19" )] #[test_case::test_case( Era1Id::new("mainnet", 1000, 100), - "mainnet-1000-100.era1"; + "mainnet-00000-00001-00000000.era1"; "ID without hash" )] #[test_case::test_case( - Era1Id::new("sepolia", 12345, 8192).with_hash([0xab, 0xcd, 0xef, 0x12]), - "sepolia-12345-abcdef12.era1"; - "Large block number" + Era1Id::new("sepolia", 101130240, 8192).with_hash([0xab, 0xcd, 0xef, 0x12]), + "sepolia-12345-00001-abcdef12.era1"; + "Large block number era 12345" )] fn test_era1id_file_naming(id: Era1Id, expected_file_name: &str) { let actual_file_name = id.to_file_name(); diff --git a/crates/ethereum/cli/src/interface.rs b/crates/ethereum/cli/src/interface.rs index 3d89c1317e..f4920eff4b 100644 --- a/crates/ethereum/cli/src/interface.rs +++ b/crates/ethereum/cli/src/interface.rs @@ -5,7 +5,7 @@ use clap::{Parser, Subcommand}; use reth_chainspec::ChainSpec; use reth_cli::chainspec::ChainSpecParser; use reth_cli_commands::{ - config_cmd, db, download, dump_genesis, import, import_era, init_cmd, init_state, + config_cmd, db, download, dump_genesis, export_era, import, import_era, init_cmd, init_state, launcher::FnLauncher, node::{self, NoArgs}, p2p, prune, re_execute, recover, stage, @@ -166,6 +166,9 @@ impl, Ext: clap::Args + fmt::Debug> Cl Commands::ImportEra(command) => { runner.run_blocking_until_ctrl_c(command.execute::()) } + Commands::ExportEra(command) => { + runner.run_blocking_until_ctrl_c(command.execute::()) + } Commands::DumpGenesis(command) => runner.run_blocking_until_ctrl_c(command.execute()), Commands::Db(command) => { runner.run_blocking_until_ctrl_c(command.execute::()) @@ -221,6 +224,9 @@ pub enum Commands { /// This syncs ERA encoded blocks from a directory. #[command(name = "import-era")] ImportEra(import_era::ImportEraCommand), + /// Exports block to era1 files in a specified directory. + #[command(name = "export-era")] + ExportEra(export_era::ExportEraCommand), /// Dumps genesis block JSON configuration to stdout. DumpGenesis(dump_genesis::DumpGenesisCommand), /// Database debugging utilities @@ -264,6 +270,7 @@ impl Commands { Self::Init(cmd) => cmd.chain_spec(), Self::InitState(cmd) => cmd.chain_spec(), Self::Import(cmd) => cmd.chain_spec(), + Self::ExportEra(cmd) => cmd.chain_spec(), Self::ImportEra(cmd) => cmd.chain_spec(), Self::DumpGenesis(cmd) => cmd.chain_spec(), Self::Db(cmd) => cmd.chain_spec(), diff --git a/docs/vocs/docs/pages/cli/SUMMARY.mdx b/docs/vocs/docs/pages/cli/SUMMARY.mdx index 44d7408253..fff16ea582 100644 --- a/docs/vocs/docs/pages/cli/SUMMARY.mdx +++ b/docs/vocs/docs/pages/cli/SUMMARY.mdx @@ -4,6 +4,7 @@ - [`reth init-state`](/cli/reth/init-state) - [`reth import`](/cli/reth/import) - [`reth import-era`](/cli/reth/import-era) + - [`reth export-era`](/cli/reth/export-era) - [`reth dump-genesis`](/cli/reth/dump-genesis) - [`reth db`](/cli/reth/db) - [`reth db stats`](/cli/reth/db/stats) diff --git a/docs/vocs/docs/pages/cli/reth.mdx b/docs/vocs/docs/pages/cli/reth.mdx index 031fe62f46..04775950b2 100644 --- a/docs/vocs/docs/pages/cli/reth.mdx +++ b/docs/vocs/docs/pages/cli/reth.mdx @@ -14,6 +14,7 @@ Commands: init-state Initialize the database from a state dump file import This syncs RLP encoded blocks from a file import-era This syncs ERA encoded blocks from a directory + export-era Exports block to era1 files in a specified directory dump-genesis Dumps genesis block JSON configuration to stdout db Database debugging utilities download Download public node snapshots diff --git a/docs/vocs/docs/pages/cli/reth/export-era.mdx b/docs/vocs/docs/pages/cli/reth/export-era.mdx new file mode 100644 index 0000000000..165970638b --- /dev/null +++ b/docs/vocs/docs/pages/cli/reth/export-era.mdx @@ -0,0 +1,162 @@ +# reth export-era + +Exports block to era1 files in a specified directory + +```bash +$ reth export-era --help +``` +```txt +Usage: reth export-era [OPTIONS] + +Options: + -h, --help + Print help (see a summary with '-h') + +Datadir: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --datadir.static-files + The absolute path to store static files in. + + --config + The path to the configuration file to use + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, holesky, hoodi, dev + + [default: mainnet] + +Database: + --db.log-level + Database logging level. Levels higher than "notice" require a debug build + + Possible values: + - fatal: Enables logging for critical conditions, i.e. assertion failures + - error: Enables logging for error conditions + - warn: Enables logging for warning conditions + - notice: Enables logging for normal but significant condition + - verbose: Enables logging for verbose informational + - debug: Enables logging for debug-level messages + - trace: Enables logging for trace debug-level messages + - extra: Enables logging for extra debug-level messages + + --db.exclusive + Open environment in exclusive/monopolistic mode. Makes it possible to open a database on an NFS volume + + [possible values: true, false] + + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + + --db.read-transaction-timeout + Read transaction timeout in seconds, 0 means no timeout + + --first-block-number + Optional first block number to export from the db. + It is by default 0. + + --last-block-number + Optional last block number to export from the db. + It is by default 8191. + + --max-blocks-per-file + The maximum number of blocks per file, it can help you to decrease the size of the files. + Must be less than or equal to 8192. + + --path + The directory path where to export era1 files. + The block data are read from the database. + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/docs/vocs/sidebar.ts b/docs/vocs/sidebar.ts index 65829d8e48..140b056e0a 100644 --- a/docs/vocs/sidebar.ts +++ b/docs/vocs/sidebar.ts @@ -313,6 +313,10 @@ export const sidebar: SidebarItem[] = [ text: "reth import-era", link: "/cli/reth/import-era" }, + { + text: "reth export-era", + link: "/cli/reth/export-era" + }, { text: "reth dump-genesis", link: "/cli/reth/dump-genesis"