From 26f4aab2a94fec7ec70d30b12cf677edd46754d2 Mon Sep 17 00:00:00 2001 From: Georgios Konstantopoulos Date: Wed, 4 Mar 2026 13:32:45 -0800 Subject: [PATCH] feat(download): modular snapshot downloads with interactive TUI and config generation (#22246) Co-authored-by: Amp Co-authored-by: Dan Cline <6798349+Rjected@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 Co-authored-by: Derek Cofausper <256792747+decofe@users.noreply.github.com> --- Cargo.lock | 22 + Cargo.toml | 1 + crates/cli/commands/Cargo.toml | 6 +- crates/cli/commands/src/download.rs | 630 ------ .../cli/commands/src/download/config_gen.rs | 609 ++++++ crates/cli/commands/src/download/manifest.rs | 1091 ++++++++++ .../cli/commands/src/download/manifest_cmd.rs | 232 ++ crates/cli/commands/src/download/mod.rs | 1891 +++++++++++++++++ crates/cli/commands/src/download/tui.rs | 437 ++++ crates/ethereum/cli/src/app.rs | 1 + crates/ethereum/cli/src/interface.rs | 8 +- docs/vocs/docs/pages/cli/SUMMARY.mdx | 1 + docs/vocs/docs/pages/cli/reth.mdx | 31 +- docs/vocs/docs/pages/cli/reth/download.mdx | 44 + .../docs/pages/cli/reth/snapshot-manifest.mdx | 180 ++ docs/vocs/sidebar-cli-reth.ts | 4 + 16 files changed, 4540 insertions(+), 648 deletions(-) delete mode 100644 crates/cli/commands/src/download.rs create mode 100644 crates/cli/commands/src/download/config_gen.rs create mode 100644 crates/cli/commands/src/download/manifest.rs create mode 100644 crates/cli/commands/src/download/manifest_cmd.rs create mode 100644 crates/cli/commands/src/download/mod.rs create mode 100644 crates/cli/commands/src/download/tui.rs create mode 100644 docs/vocs/docs/pages/cli/reth/snapshot-manifest.mdx diff --git a/Cargo.lock b/Cargo.lock index 257dca31c1..9cd8bd87d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1711,6 +1711,20 @@ dependencies = [ "wyz", ] +[[package]] +name = "blake3" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "cpufeatures", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -2540,6 +2554,12 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + [[package]] name = "convert_case" version = "0.10.0" @@ -7732,6 +7752,7 @@ dependencies = [ "alloy-rlp", "arbitrary", "backon", + "blake3", "clap", "comfy-table", "crossterm", @@ -7747,6 +7768,7 @@ dependencies = [ "proptest", "proptest-arbitrary-interop", "ratatui", + "rayon", "reqwest", "reth-chainspec", "reth-cli", diff --git a/Cargo.toml b/Cargo.toml index 5ac38bd5a8..cde4c4dd14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -504,6 +504,7 @@ bincode = "1.3" bitflags = "2.4" boyer-moore-magiclen = "0.2.16" bytes = { version = "1.11.1", default-features = false } +blake3 = "1.8" brotli = "8" cfg-if = "1.0" clap = "4" diff --git a/crates/cli/commands/Cargo.toml b/crates/cli/commands/Cargo.toml index e70b27dc85..a572ebbf9d 100644 --- a/crates/cli/commands/Cargo.toml +++ b/crates/cli/commands/Cargo.toml @@ -46,7 +46,7 @@ reth-prune.workspace = true reth-prune-types.workspace = true reth-revm.workspace = true reth-stages.workspace = true -reth-stages-types = { workspace = true, optional = true } +reth-stages-types.workspace = true reth-static-file-types = { workspace = true, features = ["clap"] } reth-static-file.workspace = true reth-tasks.workspace = true @@ -87,6 +87,8 @@ tokio-stream.workspace = true reqwest.workspace = true url.workspace = true metrics.workspace = true +blake3.workspace = true +rayon.workspace = true # io fdlimit.workspace = true @@ -127,7 +129,7 @@ arbitrary = [ "reth-trie-common?/test-utils", "reth-codecs/arbitrary", "reth-prune-types/arbitrary", - "reth-stages-types?/arbitrary", + "reth-stages-types/arbitrary", "reth-trie-common?/arbitrary", "alloy-consensus/arbitrary", "reth-primitives-traits/arbitrary", diff --git a/crates/cli/commands/src/download.rs b/crates/cli/commands/src/download.rs deleted file mode 100644 index 9573427dba..0000000000 --- a/crates/cli/commands/src/download.rs +++ /dev/null @@ -1,630 +0,0 @@ -use crate::common::EnvironmentArgs; -use clap::Parser; -use eyre::Result; -use lz4::Decoder; -use reqwest::{blocking::Client as BlockingClient, header::RANGE, Client, StatusCode}; -use reth_chainspec::{EthChainSpec, EthereumHardforks}; -use reth_cli::chainspec::ChainSpecParser; -use reth_fs_util as fs; -use std::{ - borrow::Cow, - fs::OpenOptions, - io::{self, BufWriter, Read, Write}, - path::{Path, PathBuf}, - sync::{Arc, OnceLock}, - time::{Duration, Instant}, -}; -use tar::Archive; -use tokio::task; -use tracing::info; -use url::Url; -use zstd::stream::read::Decoder as ZstdDecoder; - -const BYTE_UNITS: [&str; 4] = ["B", "KB", "MB", "GB"]; -const MERKLE_BASE_URL: &str = "https://downloads.merkle.io"; -const EXTENSION_TAR_LZ4: &str = ".tar.lz4"; -const EXTENSION_TAR_ZSTD: &str = ".tar.zst"; - -/// Global static download defaults -static DOWNLOAD_DEFAULTS: OnceLock = OnceLock::new(); - -/// Download configuration defaults -/// -/// Global defaults can be set via [`DownloadDefaults::try_init`]. -#[derive(Debug, Clone)] -pub struct DownloadDefaults { - /// List of available snapshot sources - pub available_snapshots: Vec>, - /// Default base URL for snapshots - pub default_base_url: Cow<'static, str>, - /// Default base URL for chain-aware snapshots. - /// - /// When set, the chain ID is appended to form the full URL: `{base_url}/{chain_id}`. - /// For example, given a base URL of `https://snapshots.example.com` and chain ID `1`, - /// the resulting URL would be `https://snapshots.example.com/1`. - /// - /// Falls back to [`default_base_url`](Self::default_base_url) when `None`. - pub default_chain_aware_base_url: Option>, - /// Optional custom long help text that overrides the generated help - pub long_help: Option, -} - -impl DownloadDefaults { - /// Initialize the global download defaults with this configuration - pub fn try_init(self) -> Result<(), Self> { - DOWNLOAD_DEFAULTS.set(self) - } - - /// Get a reference to the global download defaults - pub fn get_global() -> &'static DownloadDefaults { - DOWNLOAD_DEFAULTS.get_or_init(DownloadDefaults::default_download_defaults) - } - - /// Default download configuration with defaults from merkle.io and publicnode - pub fn default_download_defaults() -> Self { - Self { - available_snapshots: vec![ - Cow::Borrowed("https://www.merkle.io/snapshots (default, mainnet archive)"), - Cow::Borrowed("https://publicnode.com/snapshots (full nodes & testnets)"), - ], - default_base_url: Cow::Borrowed(MERKLE_BASE_URL), - default_chain_aware_base_url: None, - long_help: None, - } - } - - /// Generates the long help text for the download URL argument using these defaults. - /// - /// If a custom long_help is set, it will be returned. Otherwise, help text is generated - /// from the available_snapshots list. - pub fn long_help(&self) -> String { - if let Some(ref custom_help) = self.long_help { - return custom_help.clone(); - } - - let mut help = String::from( - "Specify a snapshot URL or let the command propose a default one.\n\nAvailable snapshot sources:\n", - ); - - for source in &self.available_snapshots { - help.push_str("- "); - help.push_str(source); - help.push('\n'); - } - - help.push_str( - "\nIf no URL is provided, the latest archive snapshot for the selected chain\nwill be proposed for download from ", - ); - help.push_str( - self.default_chain_aware_base_url.as_deref().unwrap_or(&self.default_base_url), - ); - help.push_str( - ".\n\nLocal file:// URLs are also supported for extracting snapshots from disk.", - ); - help - } - - /// Add a snapshot source to the list - pub fn with_snapshot(mut self, source: impl Into>) -> Self { - self.available_snapshots.push(source.into()); - self - } - - /// Replace all snapshot sources - pub fn with_snapshots(mut self, sources: Vec>) -> Self { - self.available_snapshots = sources; - self - } - - /// Set the default base URL, e.g. `https://downloads.merkle.io`. - pub fn with_base_url(mut self, url: impl Into>) -> Self { - self.default_base_url = url.into(); - self - } - - /// Set the default chain-aware base URL. - pub fn with_chain_aware_base_url(mut self, url: impl Into>) -> Self { - self.default_chain_aware_base_url = Some(url.into()); - self - } - - /// Builder: Set custom long help text, overriding the generated help - pub fn with_long_help(mut self, help: impl Into) -> Self { - self.long_help = Some(help.into()); - self - } -} - -impl Default for DownloadDefaults { - fn default() -> Self { - Self::default_download_defaults() - } -} - -#[derive(Debug, Parser)] -pub struct DownloadCommand { - #[command(flatten)] - env: EnvironmentArgs, - - /// Custom URL to download the snapshot from - #[arg(long, short, long_help = DownloadDefaults::get_global().long_help())] - url: Option, -} - -impl> DownloadCommand { - pub async fn execute(self) -> Result<()> { - let data_dir = self.env.datadir.resolve_datadir(self.env.chain.chain()); - fs::create_dir_all(&data_dir)?; - - let url = match self.url { - Some(url) => url, - None => { - let url = get_latest_snapshot_url(self.env.chain.chain().id()).await?; - info!(target: "reth::cli", "Using default snapshot URL: {}", url); - url - } - }; - - info!(target: "reth::cli", - chain = %self.env.chain.chain(), - dir = ?data_dir.data_dir(), - url = %url, - "Starting snapshot download and extraction" - ); - - stream_and_extract(&url, data_dir.data_dir()).await?; - info!(target: "reth::cli", "Snapshot downloaded and extracted successfully"); - - Ok(()) - } -} - -impl DownloadCommand { - /// Returns the underlying chain being used to run this command - pub fn chain_spec(&self) -> Option<&Arc> { - Some(&self.env.chain) - } -} - -// Monitor process status and display progress every 100ms -// to avoid overwhelming stdout -struct DownloadProgress { - downloaded: u64, - total_size: u64, - last_displayed: Instant, - started_at: Instant, -} - -impl DownloadProgress { - /// Creates new progress tracker with given total size - fn new(total_size: u64) -> Self { - let now = Instant::now(); - Self { downloaded: 0, total_size, last_displayed: now, started_at: now } - } - - /// Converts bytes to human readable format (B, KB, MB, GB) - fn format_size(size: u64) -> String { - let mut size = size as f64; - let mut unit_index = 0; - - while size >= 1024.0 && unit_index < BYTE_UNITS.len() - 1 { - size /= 1024.0; - unit_index += 1; - } - - format!("{:.2} {}", size, BYTE_UNITS[unit_index]) - } - - /// Format duration as human readable string - fn format_duration(duration: Duration) -> String { - let secs = duration.as_secs(); - if secs < 60 { - format!("{secs}s") - } else if secs < 3600 { - format!("{}m {}s", secs / 60, secs % 60) - } else { - format!("{}h {}m", secs / 3600, (secs % 3600) / 60) - } - } - - /// Updates progress bar - fn update(&mut self, chunk_size: u64) -> Result<()> { - self.downloaded += chunk_size; - - // Only update display at most 10 times per second for efficiency - if self.last_displayed.elapsed() >= Duration::from_millis(100) { - let formatted_downloaded = Self::format_size(self.downloaded); - let formatted_total = Self::format_size(self.total_size); - let progress = (self.downloaded as f64 / self.total_size as f64) * 100.0; - - // Calculate ETA based on current speed - let elapsed = self.started_at.elapsed(); - let eta = if self.downloaded > 0 { - let remaining = self.total_size.saturating_sub(self.downloaded); - let speed = self.downloaded as f64 / elapsed.as_secs_f64(); - if speed > 0.0 { - Duration::from_secs_f64(remaining as f64 / speed) - } else { - Duration::ZERO - } - } else { - Duration::ZERO - }; - let eta_str = Self::format_duration(eta); - - // Pad with spaces to clear any previous longer line - print!( - "\rDownloading and extracting... {progress:.2}% ({formatted_downloaded} / {formatted_total}) ETA: {eta_str} ", - ); - io::stdout().flush()?; - self.last_displayed = Instant::now(); - } - - Ok(()) - } -} - -/// Adapter to track progress while reading -struct ProgressReader { - reader: R, - progress: DownloadProgress, -} - -impl ProgressReader { - fn new(reader: R, total_size: u64) -> Self { - Self { reader, progress: DownloadProgress::new(total_size) } - } -} - -impl Read for ProgressReader { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - let bytes = self.reader.read(buf)?; - if bytes > 0 && - let Err(e) = self.progress.update(bytes as u64) - { - return Err(io::Error::other(e)); - } - Ok(bytes) - } -} - -/// Supported compression formats for snapshots -#[derive(Debug, Clone, Copy)] -enum CompressionFormat { - Lz4, - Zstd, -} - -impl CompressionFormat { - /// Detect compression format from file extension - fn from_url(url: &str) -> Result { - let path = - Url::parse(url).map(|u| u.path().to_string()).unwrap_or_else(|_| url.to_string()); - - if path.ends_with(EXTENSION_TAR_LZ4) { - Ok(Self::Lz4) - } else if path.ends_with(EXTENSION_TAR_ZSTD) { - Ok(Self::Zstd) - } else { - Err(eyre::eyre!( - "Unsupported file format. Expected .tar.lz4 or .tar.zst, got: {}", - path - )) - } - } -} - -/// Extracts a compressed tar archive to the target directory with progress tracking. -fn extract_archive( - reader: R, - total_size: u64, - format: CompressionFormat, - target_dir: &Path, -) -> Result<()> { - let progress_reader = ProgressReader::new(reader, total_size); - - match format { - CompressionFormat::Lz4 => { - let decoder = Decoder::new(progress_reader)?; - Archive::new(decoder).unpack(target_dir)?; - } - CompressionFormat::Zstd => { - let decoder = ZstdDecoder::new(progress_reader)?; - Archive::new(decoder).unpack(target_dir)?; - } - } - - info!(target: "reth::cli", "Extraction complete."); - Ok(()) -} - -/// Extracts a snapshot from a local file. -fn extract_from_file(path: &Path, format: CompressionFormat, target_dir: &Path) -> Result<()> { - let file = std::fs::File::open(path)?; - let total_size = file.metadata()?.len(); - extract_archive(file, total_size, format, target_dir) -} - -const MAX_DOWNLOAD_RETRIES: u32 = 10; -const RETRY_BACKOFF_SECS: u64 = 5; - -/// Wrapper that tracks download progress while writing data. -/// Used with [`io::copy`] to display progress during downloads. -struct ProgressWriter { - inner: W, - progress: DownloadProgress, -} - -impl Write for ProgressWriter { - fn write(&mut self, buf: &[u8]) -> io::Result { - let n = self.inner.write(buf)?; - let _ = self.progress.update(n as u64); - Ok(n) - } - - fn flush(&mut self) -> io::Result<()> { - self.inner.flush() - } -} - -/// Downloads a file with resume support using HTTP Range requests. -/// Automatically retries on failure, resuming from where it left off. -/// Returns the path to the downloaded file and its total size. -fn resumable_download(url: &str, target_dir: &Path) -> Result<(PathBuf, u64)> { - let file_name = Url::parse(url) - .ok() - .and_then(|u| u.path_segments()?.next_back().map(|s| s.to_string())) - .unwrap_or_else(|| "snapshot.tar".to_string()); - - let final_path = target_dir.join(&file_name); - let part_path = target_dir.join(format!("{file_name}.part")); - - let client = BlockingClient::builder().timeout(Duration::from_secs(30)).build()?; - - let mut total_size: Option = None; - let mut last_error: Option = None; - - let finalize_download = |size: u64| -> Result<(PathBuf, u64)> { - fs::rename(&part_path, &final_path)?; - info!(target: "reth::cli", "Download complete: {}", final_path.display()); - Ok((final_path.clone(), size)) - }; - - for attempt in 1..=MAX_DOWNLOAD_RETRIES { - let existing_size = fs::metadata(&part_path).map(|m| m.len()).unwrap_or(0); - - if let Some(total) = total_size && - existing_size >= total - { - return finalize_download(total); - } - - if attempt > 1 { - info!(target: "reth::cli", - "Retry attempt {}/{} - resuming from {} bytes", - attempt, MAX_DOWNLOAD_RETRIES, existing_size - ); - } - - let mut request = client.get(url); - if existing_size > 0 { - request = request.header(RANGE, format!("bytes={existing_size}-")); - if attempt == 1 { - info!(target: "reth::cli", "Resuming download from {} bytes", existing_size); - } - } - - let response = match request.send().and_then(|r| r.error_for_status()) { - Ok(r) => r, - Err(e) => { - last_error = Some(e.into()); - if attempt < MAX_DOWNLOAD_RETRIES { - info!(target: "reth::cli", - "Download failed, retrying in {} seconds...", RETRY_BACKOFF_SECS - ); - std::thread::sleep(Duration::from_secs(RETRY_BACKOFF_SECS)); - } - continue; - } - }; - - let is_partial = response.status() == StatusCode::PARTIAL_CONTENT; - - let size = if is_partial { - response - .headers() - .get("Content-Range") - .and_then(|v| v.to_str().ok()) - .and_then(|v| v.split('/').next_back()) - .and_then(|v| v.parse().ok()) - } else { - response.content_length() - }; - - if total_size.is_none() { - total_size = size; - } - - let current_total = total_size.ok_or_else(|| { - eyre::eyre!("Server did not provide Content-Length or Content-Range header") - })?; - - let file = if is_partial && existing_size > 0 { - OpenOptions::new() - .append(true) - .open(&part_path) - .map_err(|e| fs::FsPathError::open(e, &part_path))? - } else { - fs::create_file(&part_path)? - }; - - let start_offset = if is_partial { existing_size } else { 0 }; - let mut progress = DownloadProgress::new(current_total); - progress.downloaded = start_offset; - - let mut writer = ProgressWriter { inner: BufWriter::new(file), progress }; - let mut reader = response; - - let copy_result = io::copy(&mut reader, &mut writer); - let flush_result = writer.inner.flush(); - println!(); - - if let Err(e) = copy_result.and(flush_result) { - last_error = Some(e.into()); - if attempt < MAX_DOWNLOAD_RETRIES { - info!(target: "reth::cli", - "Download interrupted, retrying in {} seconds...", RETRY_BACKOFF_SECS - ); - std::thread::sleep(Duration::from_secs(RETRY_BACKOFF_SECS)); - } - continue; - } - - return finalize_download(current_total); - } - - Err(last_error - .unwrap_or_else(|| eyre::eyre!("Download failed after {} attempts", MAX_DOWNLOAD_RETRIES))) -} - -/// Fetches the snapshot from a remote URL with resume support, then extracts it. -fn download_and_extract(url: &str, format: CompressionFormat, target_dir: &Path) -> Result<()> { - let (downloaded_path, total_size) = resumable_download(url, target_dir)?; - - info!(target: "reth::cli", "Extracting snapshot..."); - let file = fs::open(&downloaded_path)?; - extract_archive(file, total_size, format, target_dir)?; - - fs::remove_file(&downloaded_path)?; - info!(target: "reth::cli", "Removed downloaded archive"); - - Ok(()) -} - -/// Downloads and extracts a snapshot, blocking until finished. -/// -/// Supports both `file://` URLs for local files and HTTP(S) URLs for remote downloads. -fn blocking_download_and_extract(url: &str, target_dir: &Path) -> Result<()> { - let format = CompressionFormat::from_url(url)?; - - if let Ok(parsed_url) = Url::parse(url) && - parsed_url.scheme() == "file" - { - let file_path = parsed_url - .to_file_path() - .map_err(|_| eyre::eyre!("Invalid file:// URL path: {}", url))?; - extract_from_file(&file_path, format, target_dir) - } else { - download_and_extract(url, format, target_dir) - } -} - -async fn stream_and_extract(url: &str, target_dir: &Path) -> Result<()> { - let target_dir = target_dir.to_path_buf(); - let url = url.to_string(); - task::spawn_blocking(move || blocking_download_and_extract(&url, &target_dir)).await??; - - Ok(()) -} - -// Builds default URL for latest mainnet archive snapshot using configured defaults -async fn get_latest_snapshot_url(chain_id: u64) -> Result { - let defaults = DownloadDefaults::get_global(); - let base_url = match &defaults.default_chain_aware_base_url { - Some(url) => format!("{url}/{chain_id}"), - None => defaults.default_base_url.to_string(), - }; - let latest_url = format!("{base_url}/latest.txt"); - let filename = Client::new() - .get(latest_url) - .send() - .await? - .error_for_status()? - .text() - .await? - .trim() - .to_string(); - - Ok(format!("{base_url}/{filename}")) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_download_defaults_builder() { - let defaults = DownloadDefaults::default() - .with_snapshot("https://example.com/snapshots (example)") - .with_base_url("https://example.com"); - - assert_eq!(defaults.default_base_url, "https://example.com"); - assert_eq!(defaults.available_snapshots.len(), 3); // 2 defaults + 1 added - } - - #[test] - fn test_download_defaults_replace_snapshots() { - let defaults = DownloadDefaults::default().with_snapshots(vec![ - Cow::Borrowed("https://custom1.com"), - Cow::Borrowed("https://custom2.com"), - ]); - - assert_eq!(defaults.available_snapshots.len(), 2); - assert_eq!(defaults.available_snapshots[0], "https://custom1.com"); - } - - #[test] - fn test_long_help_generation() { - let defaults = DownloadDefaults::default(); - let help = defaults.long_help(); - - assert!(help.contains("Available snapshot sources:")); - assert!(help.contains("merkle.io")); - assert!(help.contains("publicnode.com")); - assert!(help.contains("file://")); - } - - #[test] - fn test_long_help_override() { - let custom_help = "This is custom help text for downloading snapshots."; - let defaults = DownloadDefaults::default().with_long_help(custom_help); - - let help = defaults.long_help(); - assert_eq!(help, custom_help); - assert!(!help.contains("Available snapshot sources:")); - } - - #[test] - fn test_builder_chaining() { - let defaults = DownloadDefaults::default() - .with_base_url("https://custom.example.com") - .with_snapshot("https://snapshot1.com") - .with_snapshot("https://snapshot2.com") - .with_long_help("Custom help for snapshots"); - - assert_eq!(defaults.default_base_url, "https://custom.example.com"); - assert_eq!(defaults.available_snapshots.len(), 4); // 2 defaults + 2 added - assert_eq!(defaults.long_help, Some("Custom help for snapshots".to_string())); - } - - #[test] - fn test_compression_format_detection() { - assert!(matches!( - CompressionFormat::from_url("https://example.com/snapshot.tar.lz4"), - Ok(CompressionFormat::Lz4) - )); - assert!(matches!( - CompressionFormat::from_url("https://example.com/snapshot.tar.zst"), - Ok(CompressionFormat::Zstd) - )); - assert!(matches!( - CompressionFormat::from_url("file:///path/to/snapshot.tar.lz4"), - Ok(CompressionFormat::Lz4) - )); - assert!(matches!( - CompressionFormat::from_url("file:///path/to/snapshot.tar.zst"), - Ok(CompressionFormat::Zstd) - )); - assert!(CompressionFormat::from_url("https://example.com/snapshot.tar.gz").is_err()); - } -} diff --git a/crates/cli/commands/src/download/config_gen.rs b/crates/cli/commands/src/download/config_gen.rs new file mode 100644 index 0000000000..448edac0c8 --- /dev/null +++ b/crates/cli/commands/src/download/config_gen.rs @@ -0,0 +1,609 @@ +use crate::download::{ + manifest::{ComponentManifest, ComponentSelection, SnapshotComponentType, SnapshotManifest}, + SelectionPreset, +}; +use reth_chainspec::{EthereumHardfork, EthereumHardforks}; +use reth_config::config::{BlocksPerFileConfig, Config, PruneConfig, StaticFilesConfig}; +use reth_db::tables; +use reth_db_api::transaction::{DbTx, DbTxMut}; +use reth_node_core::args::DefaultPruningValues; +use reth_prune_types::{PruneCheckpoint, PruneMode, PruneSegment}; +use reth_stages_types::StageCheckpoint; +use std::{collections::BTreeMap, path::Path}; +use tracing::info; + +/// Minimum blocks to keep for receipts, matching `--minimal` prune settings. +const MINIMUM_RECEIPTS_DISTANCE: u64 = 64; + +/// Minimum blocks to keep for history/bodies, matching `--minimal` prune settings +/// (`MINIMUM_UNWIND_SAFE_DISTANCE`). +const MINIMUM_HISTORY_DISTANCE: u64 = 10064; + +/// Writes a [`Config`] as TOML to `/reth.toml`. +/// +/// If the file already exists, it is not overwritten. Returns `true` if the file was written. +pub fn write_config(config: &Config, data_dir: &Path) -> eyre::Result { + let config_path = data_dir.join("reth.toml"); + + if config_path.exists() { + info!(target: "reth::cli", + path = ?config_path, + "reth.toml already exists, skipping config generation" + ); + return Ok(false); + } + + let toml_str = toml::to_string_pretty(config)?; + reth_fs_util::write(&config_path, toml_str)?; + + info!(target: "reth::cli", + path = ?config_path, + "Generated reth.toml based on downloaded components" + ); + + Ok(true) +} + +/// Writes prune checkpoints to the provided write transaction. +pub(crate) fn write_prune_checkpoints_tx( + tx: &Tx, + config: &Config, + snapshot_block: u64, +) -> eyre::Result<()> +where + Tx: DbTx + DbTxMut, +{ + let segments = &config.prune.segments; + + // Collect (segment, mode) pairs for all configured prune segments + let checkpoints: Vec<(PruneSegment, PruneMode)> = [ + (PruneSegment::SenderRecovery, segments.sender_recovery), + (PruneSegment::TransactionLookup, segments.transaction_lookup), + (PruneSegment::Receipts, segments.receipts), + (PruneSegment::AccountHistory, segments.account_history), + (PruneSegment::StorageHistory, segments.storage_history), + (PruneSegment::Bodies, segments.bodies_history), + ] + .into_iter() + .filter_map(|(segment, mode)| mode.map(|m| (segment, m))) + .collect(); + + if checkpoints.is_empty() { + return Ok(()); + } + + // Look up the last tx number for the snapshot block from BlockBodyIndices + let tx_number = + tx.get::(snapshot_block)?.map(|indices| indices.last_tx_num()); + + for (segment, prune_mode) in &checkpoints { + let checkpoint = PruneCheckpoint { + block_number: Some(snapshot_block), + tx_number, + prune_mode: *prune_mode, + }; + + tx.put::(*segment, checkpoint)?; + + info!(target: "reth::cli", + segment = %segment, + block = snapshot_block, + tx = ?tx_number, + mode = ?prune_mode, + "Set prune checkpoint" + ); + } + + Ok(()) +} + +/// Stage IDs for index stages whose output is stored in RocksDB and is never +/// distributed in snapshots. +const INDEX_STAGE_IDS: [&str; 3] = + ["TransactionLookup", "IndexAccountHistory", "IndexStorageHistory"]; + +/// Prune segments that correspond to the index stages. +const INDEX_PRUNE_SEGMENTS: [PruneSegment; 3] = + [PruneSegment::TransactionLookup, PruneSegment::AccountHistory, PruneSegment::StorageHistory]; + +/// Resets stage and prune checkpoints for stages whose output is not included +/// in the snapshot inside an existing write transaction. +/// +/// A snapshot's mdbx comes from a fully synced node, so it has stage checkpoints +/// at the tip for `TransactionLookup`, `IndexAccountHistory`, and +/// `IndexStorageHistory`. Since we don't distribute the rocksdb indices those +/// stages produced, we must reset their checkpoints to block 0. Otherwise the +/// pipeline would see "already done" and skip rebuilding entirely. +/// +/// We intentionally do not reset `SenderRecovery`: sender static files are +/// distributed for archive downloads, and non-archive downloads rely on the +/// configured prune checkpoints for this segment. +pub(crate) fn reset_index_stage_checkpoints_tx(tx: &Tx) -> eyre::Result<()> +where + Tx: DbTx + DbTxMut, +{ + for stage_id in INDEX_STAGE_IDS { + tx.put::(stage_id.to_string(), StageCheckpoint::default())?; + + // Also clear any stage-specific progress data + tx.delete::(stage_id.to_string(), None)?; + + info!(target: "reth::cli", stage = stage_id, "Reset stage checkpoint to block 0"); + } + + // Clear corresponding prune checkpoints so the pruner doesn't inherit + // state from the source node + for segment in INDEX_PRUNE_SEGMENTS { + tx.delete::(segment, None)?; + } + + Ok(()) +} + +/// Generates a [`Config`] from per-component range selections. +/// +/// When all data components are selected as `All`, no pruning is configured (archive node). +/// Otherwise, `--minimal` style pruning is applied for missing/partial components. +pub(crate) fn config_for_selections( + selections: &BTreeMap, + manifest: &SnapshotManifest, + preset: Option, + chain_spec: Option<&impl EthereumHardforks>, +) -> Config { + let selection_for = |ty| selections.get(&ty).copied().unwrap_or(ComponentSelection::None); + + let tx_sel = selection_for(SnapshotComponentType::Transactions); + let senders_sel = selection_for(SnapshotComponentType::TransactionSenders); + let receipt_sel = selection_for(SnapshotComponentType::Receipts); + let account_cs_sel = selection_for(SnapshotComponentType::AccountChangesets); + let storage_cs_sel = selection_for(SnapshotComponentType::StorageChangesets); + + // Archive node — all data components present, no pruning + let is_archive = [tx_sel, senders_sel, receipt_sel, account_cs_sel, storage_cs_sel] + .iter() + .all(|s| *s == ComponentSelection::All); + + // Extract blocks_per_file from manifest for all component types + let blocks_per_file = |ty: SnapshotComponentType| -> Option { + match manifest.component(ty)? { + ComponentManifest::Chunked(c) => Some(c.blocks_per_file), + ComponentManifest::Single(_) => None, + } + }; + let static_files = StaticFilesConfig { + blocks_per_file: BlocksPerFileConfig { + headers: blocks_per_file(SnapshotComponentType::Headers), + transactions: blocks_per_file(SnapshotComponentType::Transactions), + receipts: blocks_per_file(SnapshotComponentType::Receipts), + transaction_senders: blocks_per_file(SnapshotComponentType::TransactionSenders), + account_change_sets: blocks_per_file(SnapshotComponentType::AccountChangesets), + storage_change_sets: blocks_per_file(SnapshotComponentType::StorageChangesets), + }, + }; + + if is_archive || matches!(preset, Some(SelectionPreset::Archive)) { + return Config { static_files, ..Default::default() }; + } + + if matches!(preset, Some(SelectionPreset::Full)) { + let defaults = DefaultPruningValues::get_global(); + let mut segments = defaults.full_prune_modes.clone(); + + if defaults.full_bodies_history_use_pre_merge { + segments.bodies_history = chain_spec.and_then(|chain_spec| { + chain_spec + .ethereum_fork_activation(EthereumHardfork::Paris) + .block_number() + .map(PruneMode::Before) + }); + } + + return Config { + prune: PruneConfig { block_interval: PruneConfig::default().block_interval, segments }, + static_files, + ..Default::default() + }; + } + + let mut config = Config::default(); + let mut prune = PruneConfig::default(); + + if senders_sel != ComponentSelection::All { + prune.segments.sender_recovery = Some(PruneMode::Full); + } + prune.segments.transaction_lookup = Some(PruneMode::Full); + + if let Some(mode) = selection_to_prune_mode(tx_sel, Some(MINIMUM_HISTORY_DISTANCE)) { + prune.segments.bodies_history = Some(mode); + } + + if let Some(mode) = selection_to_prune_mode(receipt_sel, Some(MINIMUM_RECEIPTS_DISTANCE)) { + prune.segments.receipts = Some(mode); + } + + if let Some(mode) = selection_to_prune_mode(account_cs_sel, Some(MINIMUM_HISTORY_DISTANCE)) { + prune.segments.account_history = Some(mode); + } + + if let Some(mode) = selection_to_prune_mode(storage_cs_sel, Some(MINIMUM_HISTORY_DISTANCE)) { + prune.segments.storage_history = Some(mode); + } + + config.prune = prune; + config.static_files = static_files; + config +} + +/// Converts a [`ComponentSelection`] to an optional [`PruneMode`]. +/// +/// `min_distance` enforces the minimum blocks required for this segment. +/// When set, `None` and distances below the minimum are clamped to it +/// instead of producing `PruneMode::Full` which reth would reject. +fn selection_to_prune_mode( + sel: ComponentSelection, + min_distance: Option, +) -> Option { + match sel { + ComponentSelection::All => None, + ComponentSelection::Distance(d) => { + Some(PruneMode::Distance(min_distance.map_or(d, |min| d.max(min)))) + } + ComponentSelection::None => Some(min_distance.map_or(PruneMode::Full, PruneMode::Distance)), + } +} + +/// Human-readable prune config summary. +pub(crate) fn describe_prune_config(config: &Config) -> Vec { + let segments = &config.prune.segments; + + [ + ("sender_recovery", segments.sender_recovery), + ("transaction_lookup", segments.transaction_lookup), + ("bodies_history", segments.bodies_history), + ("receipts", segments.receipts), + ("account_history", segments.account_history), + ("storage_history", segments.storage_history), + ] + .into_iter() + .filter_map(|(name, mode)| mode.map(|m| format!("{name}={}", format_mode(&m)))) + .collect() +} + +fn format_mode(mode: &PruneMode) -> String { + match mode { + PruneMode::Full => "\"full\"".to_string(), + PruneMode::Distance(d) => format!("{{ distance = {d} }}"), + PruneMode::Before(b) => format!("{{ before = {b} }}"), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use reth_db::Database; + + /// Empty manifest for tests that only care about prune config. + fn empty_manifest() -> SnapshotManifest { + SnapshotManifest { + block: 0, + chain_id: 1, + storage_version: 2, + timestamp: 0, + base_url: None, + components: BTreeMap::new(), + } + } + + #[test] + fn write_prune_checkpoints_sets_all_segments() { + let dir = tempfile::tempdir().unwrap(); + let db = reth_db::init_db(dir.path(), reth_db::mdbx::DatabaseArguments::default()).unwrap(); + + let mut selections = BTreeMap::new(); + selections.insert(SnapshotComponentType::State, ComponentSelection::All); + selections.insert(SnapshotComponentType::Headers, ComponentSelection::All); + let config = config_for_selections( + &selections, + &empty_manifest(), + None, + None::<&reth_chainspec::ChainSpec>, + ); + let snapshot_block = 21_000_000; + + { + let tx = db.tx_mut().unwrap(); + write_prune_checkpoints_tx(&tx, &config, snapshot_block).unwrap(); + tx.commit().unwrap(); + } + + // Verify all expected segments have checkpoints + let tx = db.tx().unwrap(); + for segment in [ + PruneSegment::SenderRecovery, + PruneSegment::TransactionLookup, + PruneSegment::Receipts, + PruneSegment::AccountHistory, + PruneSegment::StorageHistory, + PruneSegment::Bodies, + ] { + let checkpoint = tx + .get::(segment) + .unwrap() + .unwrap_or_else(|| panic!("expected checkpoint for {segment}")); + assert_eq!(checkpoint.block_number, Some(snapshot_block)); + // No BlockBodyIndices in empty DB, so tx_number should be None + assert_eq!(checkpoint.tx_number, None); + } + } + + #[test] + fn write_prune_checkpoints_archive_no_checkpoints() { + let dir = tempfile::tempdir().unwrap(); + let db = reth_db::init_db(dir.path(), reth_db::mdbx::DatabaseArguments::default()).unwrap(); + + // Archive node — no pruning configured, so no checkpoints written + let mut selections = BTreeMap::new(); + for ty in SnapshotComponentType::ALL { + selections.insert(ty, ComponentSelection::All); + } + let config = config_for_selections( + &selections, + &empty_manifest(), + None, + None::<&reth_chainspec::ChainSpec>, + ); + + { + let tx = db.tx_mut().unwrap(); + write_prune_checkpoints_tx(&tx, &config, 21_000_000).unwrap(); + tx.commit().unwrap(); + } + + let tx = db.tx().unwrap(); + for segment in [PruneSegment::SenderRecovery, PruneSegment::TransactionLookup] { + assert!( + tx.get::(segment).unwrap().is_none(), + "expected no checkpoint for {segment} on archive node" + ); + } + } + + #[test] + fn selections_all_no_pruning() { + let mut selections = BTreeMap::new(); + for ty in SnapshotComponentType::ALL { + selections.insert(ty, ComponentSelection::All); + } + let config = config_for_selections( + &selections, + &empty_manifest(), + None, + None::<&reth_chainspec::ChainSpec>, + ); + // Archive node — nothing pruned + assert_eq!(config.prune.segments.transaction_lookup, None); + assert_eq!(config.prune.segments.sender_recovery, None); + assert_eq!(config.prune.segments.bodies_history, None); + assert_eq!(config.prune.segments.receipts, None); + assert_eq!(config.prune.segments.account_history, None); + assert_eq!(config.prune.segments.storage_history, None); + } + + #[test] + fn selections_none_clamps_to_minimum_distance() { + let mut selections = BTreeMap::new(); + selections.insert(SnapshotComponentType::State, ComponentSelection::All); + selections.insert(SnapshotComponentType::Headers, ComponentSelection::All); + let config = config_for_selections( + &selections, + &empty_manifest(), + None, + None::<&reth_chainspec::ChainSpec>, + ); + assert_eq!(config.prune.segments.transaction_lookup, Some(PruneMode::Full)); + assert_eq!(config.prune.segments.sender_recovery, Some(PruneMode::Full)); + // All segments clamped to their minimum distances + assert_eq!( + config.prune.segments.bodies_history, + Some(PruneMode::Distance(MINIMUM_HISTORY_DISTANCE)) + ); + assert_eq!( + config.prune.segments.receipts, + Some(PruneMode::Distance(MINIMUM_RECEIPTS_DISTANCE)) + ); + assert_eq!( + config.prune.segments.account_history, + Some(PruneMode::Distance(MINIMUM_HISTORY_DISTANCE)) + ); + assert_eq!( + config.prune.segments.storage_history, + Some(PruneMode::Distance(MINIMUM_HISTORY_DISTANCE)) + ); + } + + #[test] + fn selections_distance_maps_bodies_history() { + let mut selections = BTreeMap::new(); + selections.insert(SnapshotComponentType::State, ComponentSelection::All); + selections.insert(SnapshotComponentType::Headers, ComponentSelection::All); + selections + .insert(SnapshotComponentType::Transactions, ComponentSelection::Distance(10_064)); + selections.insert(SnapshotComponentType::Receipts, ComponentSelection::None); + selections + .insert(SnapshotComponentType::AccountChangesets, ComponentSelection::Distance(10_064)); + selections + .insert(SnapshotComponentType::StorageChangesets, ComponentSelection::Distance(10_064)); + let config = config_for_selections( + &selections, + &empty_manifest(), + None, + None::<&reth_chainspec::ChainSpec>, + ); + + assert_eq!(config.prune.segments.transaction_lookup, Some(PruneMode::Full)); + assert_eq!(config.prune.segments.sender_recovery, Some(PruneMode::Full)); + // Bodies follows tx selection + assert_eq!(config.prune.segments.bodies_history, Some(PruneMode::Distance(10_064))); + assert_eq!( + config.prune.segments.receipts, + Some(PruneMode::Distance(MINIMUM_RECEIPTS_DISTANCE)) + ); + assert_eq!(config.prune.segments.account_history, Some(PruneMode::Distance(10_064))); + assert_eq!(config.prune.segments.storage_history, Some(PruneMode::Distance(10_064))); + } + + #[test] + fn full_preset_matches_default_full_prune_config() { + let mut selections = BTreeMap::new(); + selections.insert(SnapshotComponentType::State, ComponentSelection::All); + selections.insert(SnapshotComponentType::Headers, ComponentSelection::All); + selections + .insert(SnapshotComponentType::Transactions, ComponentSelection::Distance(500_000)); + selections.insert(SnapshotComponentType::Receipts, ComponentSelection::Distance(10_064)); + + let chain_spec = reth_chainspec::MAINNET.clone(); + let config = config_for_selections( + &selections, + &empty_manifest(), + Some(SelectionPreset::Full), + Some(chain_spec.as_ref()), + ); + + assert_eq!(config.prune.segments.sender_recovery, Some(PruneMode::Full)); + assert_eq!(config.prune.segments.transaction_lookup, None); + assert_eq!( + config.prune.segments.receipts, + Some(PruneMode::Distance(MINIMUM_HISTORY_DISTANCE)) + ); + assert_eq!( + config.prune.segments.account_history, + Some(PruneMode::Distance(MINIMUM_HISTORY_DISTANCE)) + ); + assert_eq!( + config.prune.segments.storage_history, + Some(PruneMode::Distance(MINIMUM_HISTORY_DISTANCE)) + ); + + let paris_block = chain_spec + .ethereum_fork_activation(EthereumHardfork::Paris) + .block_number() + .expect("mainnet Paris block should be known"); + assert_eq!(config.prune.segments.bodies_history, Some(PruneMode::Before(paris_block))); + } + + #[test] + fn describe_selections_all_no_pruning() { + let mut selections = BTreeMap::new(); + for ty in SnapshotComponentType::ALL { + selections.insert(ty, ComponentSelection::All); + } + let config = config_for_selections( + &selections, + &empty_manifest(), + None, + None::<&reth_chainspec::ChainSpec>, + ); + let desc = describe_prune_config(&config); + // Archive node — no prune segments described + assert!(desc.is_empty()); + } + + #[test] + fn describe_selections_with_distances() { + let mut selections = BTreeMap::new(); + selections.insert(SnapshotComponentType::State, ComponentSelection::All); + selections.insert(SnapshotComponentType::Headers, ComponentSelection::All); + selections + .insert(SnapshotComponentType::Transactions, ComponentSelection::Distance(10_064)); + selections.insert(SnapshotComponentType::Receipts, ComponentSelection::None); + let config = config_for_selections( + &selections, + &empty_manifest(), + None, + None::<&reth_chainspec::ChainSpec>, + ); + let desc = describe_prune_config(&config); + assert!(desc.contains(&"sender_recovery=\"full\"".to_string())); + // Bodies follows tx selection + assert!(desc.contains(&"bodies_history={ distance = 10064 }".to_string())); + assert!(desc.contains(&"receipts={ distance = 64 }".to_string())); + } + + #[test] + fn reset_index_stage_checkpoints_clears_only_rocksdb_index_stages() { + let dir = tempfile::tempdir().unwrap(); + let db = reth_db::init_db(dir.path(), reth_db::mdbx::DatabaseArguments::default()).unwrap(); + + // Simulate a fully synced node: set stage checkpoints at tip + let tip_checkpoint = StageCheckpoint::new(24_500_000); + { + let tx = db.tx_mut().unwrap(); + for stage_id in INDEX_STAGE_IDS { + tx.put::(stage_id.to_string(), tip_checkpoint).unwrap(); + } + for segment in INDEX_PRUNE_SEGMENTS { + tx.put::( + segment, + PruneCheckpoint { + block_number: Some(24_500_000), + tx_number: None, + prune_mode: PruneMode::Full, + }, + ) + .unwrap(); + } + + // Sender recovery checkpoints should be preserved by reset. + tx.put::("SenderRecovery".to_string(), tip_checkpoint) + .unwrap(); + tx.put::( + PruneSegment::SenderRecovery, + PruneCheckpoint { + block_number: Some(24_500_000), + tx_number: None, + prune_mode: PruneMode::Full, + }, + ) + .unwrap(); + tx.commit().unwrap(); + } + + // Reset + { + let tx = db.tx_mut().unwrap(); + reset_index_stage_checkpoints_tx(&tx).unwrap(); + tx.commit().unwrap(); + } + + // Verify stage checkpoints are at block 0 + let tx = db.tx().unwrap(); + for stage_id in INDEX_STAGE_IDS { + let checkpoint = tx + .get::(stage_id.to_string()) + .unwrap() + .expect("checkpoint should exist"); + assert_eq!(checkpoint.block_number, 0, "stage {stage_id} should be reset to block 0"); + } + + // Verify prune checkpoints are deleted + for segment in INDEX_PRUNE_SEGMENTS { + assert!( + tx.get::(segment).unwrap().is_none(), + "prune checkpoint for {segment} should be deleted" + ); + } + + // Verify sender checkpoints are left untouched. + let sender_stage_checkpoint = tx + .get::("SenderRecovery".to_string()) + .unwrap() + .expect("sender checkpoint should exist"); + assert_eq!(sender_stage_checkpoint.block_number, tip_checkpoint.block_number); + + let sender_prune_checkpoint = tx + .get::(PruneSegment::SenderRecovery) + .unwrap() + .expect("sender prune checkpoint should exist"); + assert_eq!(sender_prune_checkpoint.block_number, Some(tip_checkpoint.block_number)); + } +} diff --git a/crates/cli/commands/src/download/manifest.rs b/crates/cli/commands/src/download/manifest.rs new file mode 100644 index 0000000000..19245ed689 --- /dev/null +++ b/crates/cli/commands/src/download/manifest.rs @@ -0,0 +1,1091 @@ +use blake3::Hasher; +use eyre::Result; +use rayon::prelude::*; +use reqwest::Client; +use serde::{Deserialize, Serialize}; +use std::{ + collections::BTreeMap, + io::Read, + path::{Path, PathBuf}, +}; +use tracing::info; + +/// A snapshot manifest describes available components for a snapshot at a given block height. +/// +/// Each component is either a single archive (state) or a set of chunked archives (static file +/// segments like transactions, receipts, etc). Chunked components use `blocks_per_file` to +/// define the block range per archive, matching reth's static file segment boundaries. +/// +/// Archive naming convention for chunked components: +/// `{component}-{start_block}-{end_block}.tar.zst` +/// +/// For example with `blocks_per_file: 500000` and `total_blocks: 1500000`: +/// `transactions-0-499999.tar.zst` +/// `transactions-500000-999999.tar.zst` +/// `transactions-1000000-1499999.tar.zst` +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SnapshotManifest { + /// Block number this snapshot was taken at. + pub block: u64, + /// Chain ID. + pub chain_id: u64, + /// Storage version (1 = legacy, 2 = current). + pub storage_version: u64, + /// Timestamp when the snapshot was created (unix seconds). + pub timestamp: u64, + /// Base URL for archive downloads. Component archive URLs are relative to this. + /// + /// When omitted, downloaders should derive the base URL from the manifest URL. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub base_url: Option, + /// Available snapshot components. + pub components: BTreeMap, +} + +/// Manifest entry for a single snapshot component. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum ComponentManifest { + /// A single archive file (used for state). + Single(SingleArchive), + /// A set of chunked archives split by block range (used for static file segments). + Chunked(ChunkedArchive), +} + +/// A single, non-chunked archive. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SingleArchive { + /// Archive file name (relative to base_url). + pub file: String, + /// Compressed archive size in bytes. + pub size: u64, + /// Optional BLAKE3 checksum of the compressed archive. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub blake3: Option, + /// Expected extracted plain files for this archive. + /// + /// This is the authoritative integrity source for the modular download path. + #[serde(default)] + pub output_files: Vec, +} + +/// A chunked archive set where each chunk covers a fixed block range. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChunkedArchive { + /// Number of blocks per archive file. Matches reth's `blocks_per_file` config. + pub blocks_per_file: u64, + /// Total number of blocks covered by this component. + pub total_blocks: u64, + /// Compressed size of each chunk in bytes, ordered from first to last. + /// Computed during manifest generation. Older manifests may omit this. + #[serde(default)] + pub chunk_sizes: Vec, + /// Expected extracted plain files per chunk, ordered from first to last. + /// + /// This is the authoritative integrity source for the modular download path. + #[serde(default)] + pub chunk_output_files: Vec>, +} + +/// Expected metadata for one extracted plain file. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct OutputFileChecksum { + /// Relative path under the target datadir where this file is extracted. + pub path: String, + /// Plain file size in bytes. + pub size: u64, + /// BLAKE3 checksum of the plain file contents. + pub blake3: String, +} + +/// A single archive with concrete URL and optional integrity metadata. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ArchiveDescriptor { + pub url: String, + pub file_name: String, + pub size: u64, + pub blake3: Option, + pub output_files: Vec, +} + +/// How much of a component to download. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ComponentSelection { + /// Download all chunks (full archive). + All, + /// Download only the most recent chunks covering at least `distance` blocks. + /// Maps to `PruneMode::Distance(distance)` in the generated config. + Distance(u64), + /// Don't download this component at all. + /// Maps to `PruneMode::Full` for tx-based segments, or a minimal distance for others. + None, +} + +impl std::fmt::Display for ComponentSelection { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::All => write!(f, "All"), + Self::Distance(d) => write!(f, "Last {d} blocks"), + Self::None => write!(f, "None"), + } + } +} + +/// The types of snapshot components that can be downloaded. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum SnapshotComponentType { + /// State database (mdbx). Always required. Single archive. + State, + /// Block headers static files. Chunked. + Headers, + /// Transaction static files. Chunked. + Transactions, + /// Transaction sender static files. Chunked. Only downloaded for archive nodes. + TransactionSenders, + /// Receipt static files. Chunked. + Receipts, + /// Account changeset static files. Chunked. + AccountChangesets, + /// Storage changeset static files. Chunked. + StorageChangesets, + /// RocksDB index files. Single archive. Optional and archive-only. + RocksdbIndices, +} + +impl SnapshotComponentType { + /// All component types in display order. + pub const ALL: [Self; 8] = [ + Self::State, + Self::Headers, + Self::Transactions, + Self::TransactionSenders, + Self::Receipts, + Self::AccountChangesets, + Self::StorageChangesets, + Self::RocksdbIndices, + ]; + + /// The string key used in the manifest JSON. + pub const fn key(&self) -> &'static str { + match self { + Self::State => "state", + Self::Headers => "headers", + Self::Transactions => "transactions", + Self::TransactionSenders => "transaction_senders", + Self::Receipts => "receipts", + Self::AccountChangesets => "account_changesets", + Self::StorageChangesets => "storage_changesets", + Self::RocksdbIndices => "rocksdb_indices", + } + } + + /// Human-readable display name. + pub const fn display_name(&self) -> &'static str { + match self { + Self::State => "State (mdbx)", + Self::Headers => "Headers", + Self::Transactions => "Transactions", + Self::TransactionSenders => "Transaction Senders", + Self::Receipts => "Receipts", + Self::AccountChangesets => "Account Changesets", + Self::StorageChangesets => "Storage Changesets", + Self::RocksdbIndices => "RocksDB Indices", + } + } + + /// Whether this component is always required for a functional node. + /// + /// State and headers are always needed — a node cannot operate without block headers. + pub const fn is_required(&self) -> bool { + matches!(self, Self::State | Self::Headers) + } + + /// Returns the default selection for this component in the minimal download preset. + /// + /// Matches the `--minimal` prune configuration: + /// - State/Headers: always All (required) + /// - Transactions/Changesets: Distance(10_064) (`MINIMUM_UNWIND_SAFE_DISTANCE`) + /// - Receipts: Distance(64) (`MINIMUM_DISTANCE`) + /// - TransactionSenders: None (only downloaded for archive nodes) + /// - RocksdbIndices: None (only downloaded for archive nodes) + /// + /// `tx_lookup` and `sender_recovery` are always pruned full regardless. + pub const fn minimal_selection(&self) -> ComponentSelection { + match self { + Self::State | Self::Headers => ComponentSelection::All, + Self::Transactions | Self::AccountChangesets | Self::StorageChangesets => { + ComponentSelection::Distance(10_064) + } + Self::Receipts => ComponentSelection::Distance(64), + Self::TransactionSenders => ComponentSelection::None, + Self::RocksdbIndices => ComponentSelection::None, + } + } + + /// Whether this component type uses chunked archives. + pub const fn is_chunked(&self) -> bool { + !matches!(self, Self::State | Self::RocksdbIndices) + } +} + +impl SnapshotManifest { + fn base_url_or_empty(&self) -> &str { + self.base_url.as_deref().unwrap_or("") + } + + /// Look up a component by type. + pub fn component(&self, ty: SnapshotComponentType) -> Option<&ComponentManifest> { + self.components.get(ty.key()) + } + + /// Returns the total download size for the given set of component types. + pub fn total_size(&self, types: &[SnapshotComponentType]) -> u64 { + types.iter().filter_map(|ty| self.component(*ty).map(|c| c.total_size())).sum() + } + + /// Returns all archive URLs for a given component type. + pub fn archive_urls(&self, ty: SnapshotComponentType) -> Vec { + let Some(component) = self.component(ty) else { + return vec![]; + }; + + match component { + ComponentManifest::Single(single) => { + vec![format!("{}/{}", self.base_url_or_empty(), single.file)] + } + ComponentManifest::Chunked(chunked) => { + let key = ty.key(); + let num_chunks = chunked.num_chunks(); + (0..num_chunks) + .map(|i| { + let start = i * chunked.blocks_per_file; + let end = (i + 1) * chunked.blocks_per_file - 1; + format!("{}/{key}-{start}-{end}.tar.zst", self.base_url_or_empty()) + }) + .collect() + } + } + } + + /// Returns archive URLs for a component, limited to chunks covering at least `distance` + /// blocks from the tip. Returns all URLs if distance is `None` (All mode). + pub fn archive_urls_for_distance( + &self, + ty: SnapshotComponentType, + distance: Option, + ) -> Vec { + let Some(component) = self.component(ty) else { + return vec![]; + }; + + match component { + ComponentManifest::Single(single) => { + vec![format!("{}/{}", self.base_url_or_empty(), single.file)] + } + ComponentManifest::Chunked(chunked) => { + let key = ty.key(); + let num_chunks = chunked.num_chunks(); + + // Calculate which chunks to include + let start_chunk = match distance { + Some(dist) => { + // We need chunks covering the last `dist` blocks + let needed_blocks = dist.min(chunked.total_blocks); + let needed_chunks = needed_blocks.div_ceil(chunked.blocks_per_file); + num_chunks.saturating_sub(needed_chunks) + } + None => 0, // All chunks + }; + + (start_chunk..num_chunks) + .map(|i| { + let start = i * chunked.blocks_per_file; + let end = (i + 1) * chunked.blocks_per_file - 1; + format!("{}/{key}-{start}-{end}.tar.zst", self.base_url_or_empty()) + }) + .collect() + } + } + } + + /// Returns concrete archive descriptors for a component, optionally limited to distance. + pub fn archive_descriptors_for_distance( + &self, + ty: SnapshotComponentType, + distance: Option, + ) -> Vec { + let Some(component) = self.component(ty) else { + return vec![]; + }; + + match component { + ComponentManifest::Single(single) => { + vec![ArchiveDescriptor { + url: format!("{}/{}", self.base_url_or_empty(), single.file), + file_name: single.file.clone(), + size: single.size, + blake3: single.blake3.clone(), + output_files: single.output_files.clone(), + }] + } + ComponentManifest::Chunked(chunked) => { + let key = ty.key(); + let num_chunks = chunked.num_chunks(); + + let start_chunk = match distance { + Some(dist) => { + let needed_blocks = dist.min(chunked.total_blocks); + let needed_chunks = needed_blocks.div_ceil(chunked.blocks_per_file); + num_chunks.saturating_sub(needed_chunks) + } + None => 0, + }; + + (start_chunk..num_chunks) + .map(|i| { + let start = i * chunked.blocks_per_file; + let end = (i + 1) * chunked.blocks_per_file - 1; + let file_name = format!("{key}-{start}-{end}.tar.zst"); + let size = chunked.chunk_sizes.get(i as usize).copied().unwrap_or_default(); + let output_files = + chunked.chunk_output_files.get(i as usize).cloned().unwrap_or_default(); + + ArchiveDescriptor { + url: format!("{}/{}", self.base_url_or_empty(), file_name), + file_name, + size, + blake3: None, + output_files, + } + }) + .collect() + } + } + } + + /// Returns the exact download size for a component given a distance selection. + /// + /// For single archives, returns the full size. For chunked archives, sums the + /// sizes of the selected tail chunks from [`ChunkedArchive::chunk_sizes`]. + pub fn size_for_distance(&self, ty: SnapshotComponentType, distance: Option) -> u64 { + let Some(component) = self.component(ty) else { + return 0; + }; + match component { + ComponentManifest::Single(s) => s.size, + ComponentManifest::Chunked(chunked) => { + if chunked.chunk_sizes.is_empty() { + return 0; + } + let num_chunks = chunked.chunk_sizes.len() as u64; + let start_chunk = match distance { + Some(dist) => { + let needed = dist.min(chunked.total_blocks); + let needed_chunks = needed.div_ceil(chunked.blocks_per_file); + num_chunks.saturating_sub(needed_chunks) + } + None => 0, + }; + chunked.chunk_sizes[start_chunk as usize..].iter().sum() + } + } + } + + /// Returns the number of chunks that would be downloaded for a given distance. + pub fn chunks_for_distance(&self, ty: SnapshotComponentType, distance: Option) -> u64 { + let Some(ComponentManifest::Chunked(chunked)) = self.component(ty) else { + return if self.component(ty).is_some() { 1 } else { 0 }; + }; + match distance { + Some(dist) => { + let needed = dist.min(chunked.total_blocks); + needed.div_ceil(chunked.blocks_per_file) + } + None => chunked.num_chunks(), + } + } +} + +impl ComponentManifest { + /// Returns the total download size for this component. + pub fn total_size(&self) -> u64 { + match self { + Self::Single(s) => s.size, + Self::Chunked(c) => c.chunk_sizes.iter().sum(), + } + } +} + +impl ChunkedArchive { + /// Returns the number of chunks. + pub fn num_chunks(&self) -> u64 { + self.total_blocks.div_ceil(self.blocks_per_file) + } +} + +/// Fetch a snapshot manifest from a URL. +pub async fn fetch_manifest(manifest_url: &str) -> Result { + let client = Client::new(); + let manifest: SnapshotManifest = + client.get(manifest_url).send().await?.error_for_status()?.json().await?; + Ok(manifest) +} + +/// Package chunk archives from a source datadir and generate a manifest. +pub fn generate_manifest( + source_datadir: &Path, + output_dir: &Path, + base_url: Option<&str>, + block: u64, + chain_id: u64, + blocks_per_file: u64, +) -> Result { + std::fs::create_dir_all(output_dir)?; + + let mut components = BTreeMap::new(); + + // Package chunked static-file components. + for ty in &[ + SnapshotComponentType::Headers, + SnapshotComponentType::Transactions, + SnapshotComponentType::TransactionSenders, + SnapshotComponentType::Receipts, + SnapshotComponentType::AccountChangesets, + SnapshotComponentType::StorageChangesets, + ] { + let key = ty.key(); + let num_chunks = block.div_ceil(blocks_per_file); + let mut planned_chunks = Vec::with_capacity(num_chunks as usize); + let mut found_any = false; + + for i in 0..num_chunks { + let start = i * blocks_per_file; + let end = (i + 1) * blocks_per_file - 1; + let source_files = source_files_for_chunk(source_datadir, *ty, start, end)?; + + if source_files.is_empty() { + if found_any { + eyre::bail!("Missing source files for {} chunk {}-{}", key, start, end); + } + continue; + } + + found_any = true; + planned_chunks.push(PlannedChunk { + chunk_idx: i, + archive_path: output_dir.join(chunk_filename(key, start, end)), + source_files, + }); + } + + if found_any { + let mut packaged_chunks = planned_chunks + .into_par_iter() + .map(|planned| -> Result { + let output_files = + write_chunk_archive(&planned.archive_path, &planned.source_files)?; + let size = std::fs::metadata(&planned.archive_path)?.len(); + Ok(PackagedChunk { chunk_idx: planned.chunk_idx, size, output_files }) + }) + .collect::>() + .into_iter() + .collect::>>()?; + + packaged_chunks.sort_unstable_by_key(|chunk| chunk.chunk_idx); + let chunk_sizes = packaged_chunks.iter().map(|chunk| chunk.size).collect::>(); + let chunk_output_files = + packaged_chunks.into_iter().map(|chunk| chunk.output_files).collect::>(); + let total_size: u64 = chunk_sizes.iter().sum(); + info!(target: "reth::cli", + component = ty.display_name(), + chunks = chunk_sizes.len(), + total_blocks = block, + size = %super::DownloadProgress::format_size(total_size), + "Found chunked component" + ); + components.insert( + key.to_string(), + ComponentManifest::Chunked(ChunkedArchive { + blocks_per_file, + total_blocks: block, + chunk_sizes, + chunk_output_files, + }), + ); + } + } + + let (state_size, state_output_files) = package_single_component( + output_dir, + "state.tar.zst", + &state_source_files(source_datadir)?, + )?; + components.insert( + SnapshotComponentType::State.key().to_string(), + ComponentManifest::Single(SingleArchive { + file: "state.tar.zst".to_string(), + size: state_size, + blake3: None, + output_files: state_output_files, + }), + ); + + let rocksdb_files = rocksdb_source_files(source_datadir)?; + if !rocksdb_files.is_empty() { + let (rocksdb_size, rocksdb_output_files) = + package_single_component(output_dir, "rocksdb_indices.tar.zst", &rocksdb_files)?; + components.insert( + SnapshotComponentType::RocksdbIndices.key().to_string(), + ComponentManifest::Single(SingleArchive { + file: "rocksdb_indices.tar.zst".to_string(), + size: rocksdb_size, + blake3: None, + output_files: rocksdb_output_files, + }), + ); + } + + let timestamp = std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH)?.as_secs(); + + Ok(SnapshotManifest { + block, + chain_id, + storage_version: 2, + timestamp, + base_url: base_url.map(str::to_owned), + components, + }) +} + +/// Resolves an archive file path from a component key and naming convention. +pub fn chunk_filename(component_key: &str, start: u64, end: u64) -> String { + format!("{component_key}-{start}-{end}.tar.zst") +} + +#[derive(Debug)] +struct PlannedChunk { + chunk_idx: u64, + archive_path: PathBuf, + source_files: Vec, +} + +#[derive(Debug)] +struct PackagedChunk { + chunk_idx: u64, + size: u64, + output_files: Vec, +} + +#[derive(Debug)] +struct PlannedFile { + source_path: PathBuf, + relative_path: PathBuf, +} + +fn source_files_for_chunk( + source_datadir: &Path, + component: SnapshotComponentType, + start: u64, + end: u64, +) -> Result> { + let Some(segment_name) = static_segment_name(component) else { + return Ok(Vec::new()); + }; + + let static_files_dir = source_datadir.join("static_files"); + let static_files_dir = + if static_files_dir.exists() { static_files_dir } else { source_datadir.to_path_buf() }; + let prefix = format!("static_file_{segment_name}_{start}_{end}"); + + let mut files = Vec::new(); + for entry in std::fs::read_dir(&static_files_dir)? { + let entry = entry?; + if !entry.file_type()?.is_file() { + continue; + } + if entry.file_name().to_string_lossy().starts_with(&prefix) { + files.push(entry.path()); + } + } + + files.sort_unstable(); + Ok(files) +} + +fn static_segment_name(component: SnapshotComponentType) -> Option<&'static str> { + match component { + SnapshotComponentType::Headers => Some("headers"), + SnapshotComponentType::Transactions => Some("transactions"), + SnapshotComponentType::TransactionSenders => Some("transaction-senders"), + SnapshotComponentType::Receipts => Some("receipts"), + SnapshotComponentType::AccountChangesets => Some("account-change-sets"), + SnapshotComponentType::StorageChangesets => Some("storage-change-sets"), + SnapshotComponentType::State | SnapshotComponentType::RocksdbIndices => None, + } +} + +fn state_source_files(source_datadir: &Path) -> Result> { + let db_dir = source_datadir.join("db"); + if db_dir.exists() { + return collect_files_recursive(&db_dir, Path::new("db")); + } + + if looks_like_db_dir(source_datadir)? { + return collect_files_recursive(source_datadir, Path::new("db")); + } + + eyre::bail!("Could not find source state DB directory under {}", source_datadir.display()) +} + +fn rocksdb_source_files(source_datadir: &Path) -> Result> { + let rocksdb_dir = source_datadir.join("rocksdb"); + if !rocksdb_dir.exists() { + return Ok(Vec::new()); + } + + collect_files_recursive(&rocksdb_dir, Path::new("rocksdb")) +} + +fn looks_like_db_dir(path: &Path) -> Result { + let entries = match std::fs::read_dir(path) { + Ok(entries) => entries, + Err(_) => return Ok(false), + }; + + for entry in entries { + let entry = entry?; + if !entry.file_type()?.is_file() { + continue; + } + let name = entry.file_name(); + let name = name.to_string_lossy(); + if name == "mdbx.dat" || name == "lock.mdb" || name == "data.mdb" { + return Ok(true); + } + } + + Ok(false) +} + +fn collect_files_recursive(root: &Path, output_prefix: &Path) -> Result> { + let mut files = Vec::new(); + collect_files_recursive_inner(root, root, output_prefix, &mut files)?; + files.sort_unstable_by(|a, b| a.relative_path.cmp(&b.relative_path)); + Ok(files) +} + +fn collect_files_recursive_inner( + root: &Path, + dir: &Path, + output_prefix: &Path, + files: &mut Vec, +) -> Result<()> { + for entry in std::fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + let file_type = entry.file_type()?; + if file_type.is_dir() { + collect_files_recursive_inner(root, &path, output_prefix, files)?; + continue; + } + if !file_type.is_file() { + continue; + } + + let relative = path.strip_prefix(root)?.to_path_buf(); + files.push(PlannedFile { source_path: path, relative_path: output_prefix.join(relative) }); + } + + Ok(()) +} + +fn package_single_component( + output_dir: &Path, + archive_file_name: &str, + files: &[PlannedFile], +) -> Result<(u64, Vec)> { + if files.is_empty() { + eyre::bail!("Cannot package empty single archive: {}", archive_file_name); + } + + let archive_path = output_dir.join(archive_file_name); + let output_files = write_archive_from_planned_files(&archive_path, files)?; + let size = std::fs::metadata(&archive_path)?.len(); + Ok((size, output_files)) +} + +fn write_chunk_archive(path: &Path, source_files: &[PathBuf]) -> Result> { + let planned_files = source_files + .iter() + .map(|source_path| { + let file_name = source_path.file_name().ok_or_else(|| { + eyre::eyre!("Invalid source file path: {}", source_path.display()) + })?; + Ok::<_, eyre::Error>(PlannedFile { + source_path: source_path.clone(), + relative_path: PathBuf::from("static_files").join(file_name), + }) + }) + .collect::>>()?; + + write_archive_from_planned_files(path, &planned_files) +} + +fn write_archive_from_planned_files( + path: &Path, + files: &[PlannedFile], +) -> Result> { + let file = std::fs::File::create(path)?; + let mut encoder = zstd::Encoder::new(file, 0)?; + // Emit standard zstd frames with checksums for compatibility with external + // tools such as `pzstd -d`. + encoder.include_checksum(true)?; + let mut builder = tar::Builder::new(encoder); + + let mut output_files = Vec::with_capacity(files.len()); + for planned in files { + let mut header = tar::Header::new_gnu(); + header.set_size(std::fs::metadata(&planned.source_path)?.len()); + header.set_mode(0o644); + header.set_cksum(); + + let source_file = std::fs::File::open(&planned.source_path)?; + let mut reader = HashingReader::new(source_file); + builder.append_data(&mut header, &planned.relative_path, &mut reader)?; + + output_files.push(OutputFileChecksum { + path: planned.relative_path.to_string_lossy().to_string(), + size: reader.bytes_read, + blake3: reader.finalize(), + }); + } + + builder.finish()?; + let encoder = builder.into_inner()?; + encoder.finish()?; + + Ok(output_files) +} + +struct HashingReader { + inner: R, + hasher: Hasher, + bytes_read: u64, +} + +impl HashingReader { + fn new(inner: R) -> Self { + Self { inner, hasher: Hasher::new(), bytes_read: 0 } + } + + fn finalize(self) -> String { + self.hasher.finalize().to_hex().to_string() + } +} + +impl Read for HashingReader { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let n = self.inner.read(buf)?; + if n > 0 { + self.bytes_read += n as u64; + self.hasher.update(&buf[..n]); + } + Ok(n) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + fn test_manifest() -> SnapshotManifest { + let mut components = BTreeMap::new(); + components.insert( + "state".to_string(), + ComponentManifest::Single(SingleArchive { + file: "state.tar.zst".to_string(), + size: 100, + blake3: None, + output_files: vec![], + }), + ); + components.insert( + "transactions".to_string(), + ComponentManifest::Chunked(ChunkedArchive { + blocks_per_file: 500_000, + total_blocks: 1_500_000, + chunk_sizes: vec![80_000, 100_000, 120_000], + chunk_output_files: vec![vec![], vec![], vec![]], + }), + ); + components.insert( + "headers".to_string(), + ComponentManifest::Chunked(ChunkedArchive { + blocks_per_file: 500_000, + total_blocks: 1_500_000, + chunk_sizes: vec![40_000, 50_000, 60_000], + chunk_output_files: vec![vec![], vec![], vec![]], + }), + ); + SnapshotManifest { + block: 1_500_000, + chain_id: 1, + storage_version: 2, + timestamp: 0, + base_url: Some("https://example.com".to_string()), + components, + } + } + + #[test] + fn archive_urls_for_distance_all() { + let m = test_manifest(); + let urls = m.archive_urls_for_distance(SnapshotComponentType::Transactions, None); + assert_eq!(urls.len(), 3); + assert_eq!(urls[0], "https://example.com/transactions-0-499999.tar.zst"); + assert_eq!(urls[2], "https://example.com/transactions-1000000-1499999.tar.zst"); + } + + #[test] + fn archive_urls_for_distance_partial() { + let m = test_manifest(); + // 600k blocks → needs 2 chunks (each 500k) + let urls = m.archive_urls_for_distance(SnapshotComponentType::Transactions, Some(600_000)); + assert_eq!(urls.len(), 2); + assert_eq!(urls[0], "https://example.com/transactions-500000-999999.tar.zst"); + assert_eq!(urls[1], "https://example.com/transactions-1000000-1499999.tar.zst"); + } + + #[test] + fn archive_urls_for_distance_single_component() { + let m = test_manifest(); + // Single archives always return one URL regardless of distance + let urls = m.archive_urls_for_distance(SnapshotComponentType::State, Some(100)); + assert_eq!(urls.len(), 1); + assert_eq!(urls[0], "https://example.com/state.tar.zst"); + } + + #[test] + fn archive_urls_for_distance_rocksdb_indices_single_component() { + let mut components = BTreeMap::new(); + components.insert( + "rocksdb_indices".to_string(), + ComponentManifest::Single(SingleArchive { + file: "rocksdb_indices.tar.zst".to_string(), + size: 777, + blake3: None, + output_files: vec![], + }), + ); + let m = SnapshotManifest { + block: 1, + chain_id: 1, + storage_version: 2, + timestamp: 0, + base_url: Some("https://example.com".to_string()), + components, + }; + + let urls = m.archive_urls_for_distance(SnapshotComponentType::RocksdbIndices, Some(10)); + assert_eq!(urls.len(), 1); + assert_eq!(urls[0], "https://example.com/rocksdb_indices.tar.zst"); + assert_eq!(m.size_for_distance(SnapshotComponentType::RocksdbIndices, Some(10)), 777); + } + + #[test] + fn archive_urls_for_distance_missing_component() { + let m = test_manifest(); + let urls = m.archive_urls_for_distance(SnapshotComponentType::Receipts, None); + assert!(urls.is_empty()); + } + + #[test] + fn chunks_for_distance_all() { + let m = test_manifest(); + assert_eq!(m.chunks_for_distance(SnapshotComponentType::Transactions, None), 3); + } + + #[test] + fn chunks_for_distance_partial() { + let m = test_manifest(); + assert_eq!(m.chunks_for_distance(SnapshotComponentType::Transactions, Some(600_000)), 2); + assert_eq!(m.chunks_for_distance(SnapshotComponentType::Transactions, Some(100_000)), 1); + } + + #[test] + fn chunks_for_distance_single() { + let m = test_manifest(); + assert_eq!(m.chunks_for_distance(SnapshotComponentType::State, None), 1); + assert_eq!(m.chunks_for_distance(SnapshotComponentType::State, Some(100)), 1); + } + + #[test] + fn chunks_for_distance_missing() { + let m = test_manifest(); + assert_eq!(m.chunks_for_distance(SnapshotComponentType::Receipts, None), 0); + } + + #[test] + fn component_selection_display() { + assert_eq!(ComponentSelection::All.to_string(), "All"); + assert_eq!(ComponentSelection::Distance(10_064).to_string(), "Last 10064 blocks"); + assert_eq!(ComponentSelection::None.to_string(), "None"); + } + + #[test] + fn archive_urls_aligned_to_blocks_per_file() { + // When total_blocks is not aligned to blocks_per_file, chunk boundaries + // must still align to blocks_per_file (not total_blocks). + let mut components = BTreeMap::new(); + components.insert( + "storage_changesets".to_string(), + ComponentManifest::Chunked(ChunkedArchive { + blocks_per_file: 500_000, + total_blocks: 24_396_822, + chunk_sizes: vec![100; 49], // 49 chunks + chunk_output_files: vec![vec![]; 49], + }), + ); + let m = SnapshotManifest { + block: 24_396_822, + chain_id: 1, + storage_version: 2, + timestamp: 0, + base_url: Some("https://example.com".to_string()), + components, + }; + let urls = m.archive_urls(SnapshotComponentType::StorageChangesets); + assert_eq!(urls.len(), 49); + // First chunk: 0-499999 (not 0-396821 or similar) + assert_eq!(urls[0], "https://example.com/storage_changesets-0-499999.tar.zst"); + // Last chunk: 24000000-24499999 (not 24000000-24396821) + assert_eq!(urls[48], "https://example.com/storage_changesets-24000000-24499999.tar.zst"); + } + + #[test] + fn size_for_distance_sums_tail_chunks() { + let m = test_manifest(); + // Transactions has chunk_sizes [80_000, 100_000, 120_000] + // All: sum of all 3 + assert_eq!(m.size_for_distance(SnapshotComponentType::Transactions, None), 300_000); + // Last 500K blocks = 1 chunk = last chunk only + assert_eq!( + m.size_for_distance(SnapshotComponentType::Transactions, Some(500_000)), + 120_000 + ); + // Last 600K blocks = 2 chunks = last two + assert_eq!( + m.size_for_distance(SnapshotComponentType::Transactions, Some(600_000)), + 220_000 + ); + // Single archive (state) always returns full size + assert_eq!(m.size_for_distance(SnapshotComponentType::State, Some(100)), 100); + // Missing component + assert_eq!(m.size_for_distance(SnapshotComponentType::Receipts, None), 0); + } + + #[test] + fn archive_descriptors_include_checksum_metadata() { + let mut components = BTreeMap::new(); + components.insert( + "state".to_string(), + ComponentManifest::Single(SingleArchive { + file: "state.tar.zst".to_string(), + size: 100, + blake3: Some("abc123".to_string()), + output_files: vec![OutputFileChecksum { + path: "db/mdbx.dat".to_string(), + size: 1000, + blake3: "s0".to_string(), + }], + }), + ); + components.insert( + "transactions".to_string(), + ComponentManifest::Chunked(ChunkedArchive { + blocks_per_file: 500_000, + total_blocks: 1_000_000, + chunk_sizes: vec![80_000, 120_000], + chunk_output_files: vec![ + vec![OutputFileChecksum { + path: "static_files/static_file_transactions_0_499999.bin".to_string(), + size: 111, + blake3: "h0".to_string(), + }], + vec![OutputFileChecksum { + path: "static_files/static_file_transactions_500000_999999.bin".to_string(), + size: 222, + blake3: "h1".to_string(), + }], + ], + }), + ); + + let m = SnapshotManifest { + block: 1_000_000, + chain_id: 1, + storage_version: 2, + timestamp: 0, + base_url: Some("https://example.com".to_string()), + components, + }; + + let state = m.archive_descriptors_for_distance(SnapshotComponentType::State, None); + assert_eq!(state.len(), 1); + assert_eq!(state[0].file_name, "state.tar.zst"); + assert_eq!(state[0].blake3.as_deref(), Some("abc123")); + assert_eq!(state[0].output_files.len(), 1); + + let tx = m.archive_descriptors_for_distance(SnapshotComponentType::Transactions, None); + assert_eq!(tx.len(), 2); + assert_eq!(tx[0].blake3, None); + assert_eq!(tx[1].blake3, None); + assert_eq!(tx[0].output_files[0].size, 111); + } + + #[test] + fn generate_manifest_includes_state_single_archive() { + let source = tempdir().unwrap(); + let output = tempdir().unwrap(); + let db_dir = source.path().join("db"); + std::fs::create_dir_all(&db_dir).unwrap(); + std::fs::write(db_dir.join("mdbx.dat"), b"state-data").unwrap(); + + let manifest = + generate_manifest(source.path(), output.path(), None, 0, 1, 500_000).unwrap(); + + let state = manifest.component(SnapshotComponentType::State).unwrap(); + let ComponentManifest::Single(state) = state else { + panic!("state should be a single archive") + }; + assert_eq!(state.file, "state.tar.zst"); + assert!(!state.output_files.is_empty()); + assert_eq!(state.output_files[0].path, "db/mdbx.dat"); + assert!(output.path().join("state.tar.zst").exists()); + } + + #[test] + fn generate_manifest_includes_rocksdb_single_archive_when_present() { + let source = tempdir().unwrap(); + let output = tempdir().unwrap(); + let db_dir = source.path().join("db"); + std::fs::create_dir_all(&db_dir).unwrap(); + std::fs::write(db_dir.join("mdbx.dat"), b"state-data").unwrap(); + let rocksdb_dir = source.path().join("rocksdb"); + std::fs::create_dir_all(&rocksdb_dir).unwrap(); + std::fs::write(rocksdb_dir.join("CURRENT"), b"MANIFEST-000001").unwrap(); + + let manifest = + generate_manifest(source.path(), output.path(), None, 0, 1, 500_000).unwrap(); + + let rocksdb = manifest.component(SnapshotComponentType::RocksdbIndices).unwrap(); + let ComponentManifest::Single(rocksdb) = rocksdb else { + panic!("rocksdb indices should be a single archive") + }; + assert_eq!(rocksdb.file, "rocksdb_indices.tar.zst"); + assert!(!rocksdb.output_files.is_empty()); + assert_eq!(rocksdb.output_files[0].path, "rocksdb/CURRENT"); + assert!(output.path().join("rocksdb_indices.tar.zst").exists()); + } +} diff --git a/crates/cli/commands/src/download/manifest_cmd.rs b/crates/cli/commands/src/download/manifest_cmd.rs new file mode 100644 index 0000000000..0dcdca1504 --- /dev/null +++ b/crates/cli/commands/src/download/manifest_cmd.rs @@ -0,0 +1,232 @@ +use crate::download::manifest::generate_manifest; +use clap::Parser; +use eyre::{Result, WrapErr}; +use reth_db::{mdbx::DatabaseArguments, open_db_read_only, tables, Database}; +use reth_db_api::transaction::DbTx; +use reth_stages_types::StageId; +use reth_static_file_types::DEFAULT_BLOCKS_PER_STATIC_FILE; +use std::{path::PathBuf, time::Instant}; +use tracing::{info, warn}; + +/// Generate modular chunk archives and a snapshot manifest from a source datadir. +/// +/// Archive naming convention: +/// - Chunked: `{component}-{start}-{end}.tar.zst` (e.g. `transactions-0-499999.tar.zst`) +#[derive(Debug, Parser)] +pub struct SnapshotManifestCommand { + /// Source datadir containing static files. + #[arg(long, short = 'd')] + source_datadir: PathBuf, + + /// Optional base URL where archives will be hosted. + #[arg(long)] + base_url: Option, + + /// Output directory where chunk archives and manifest.json are written. + #[arg(long, short = 'o')] + output_dir: PathBuf, + + /// Block number this snapshot was taken at. + /// + /// If omitted, this is inferred from the source datadir's `Finish` stage checkpoint. + #[arg(long)] + block: Option, + + /// Chain ID. + #[arg(long, default_value = "1")] + chain_id: u64, + + /// Blocks per archive file for chunked components. + /// + /// If omitted, this is inferred from header static file ranges in the source datadir. + #[arg(long)] + blocks_per_file: Option, +} + +impl SnapshotManifestCommand { + pub fn execute(self) -> Result<()> { + let block = match self.block { + Some(block) => block, + None => infer_snapshot_block(&self.source_datadir)?, + }; + let blocks_per_file = match self.blocks_per_file { + Some(blocks_per_file) => blocks_per_file, + None => infer_blocks_per_file(&self.source_datadir)?, + }; + + info!(target: "reth::cli", + dir = ?self.source_datadir, + output = ?self.output_dir, + block, + blocks_per_file, + "Packaging modular snapshot archives" + ); + let start = Instant::now(); + let manifest = generate_manifest( + &self.source_datadir, + &self.output_dir, + self.base_url.as_deref(), + block, + self.chain_id, + blocks_per_file, + )?; + + let num_components = manifest.components.len(); + let json = serde_json::to_string_pretty(&manifest)?; + let output = self.output_dir.join("manifest.json"); + reth_fs_util::write(&output, &json)?; + info!(target: "reth::cli", + path = ?output, + components = num_components, + block = manifest.block, + elapsed = ?start.elapsed(), + "Manifest written" + ); + + Ok(()) + } +} + +fn infer_snapshot_block(source_datadir: &std::path::Path) -> Result { + if let Ok(block) = infer_snapshot_block_from_db(source_datadir) { + return Ok(block); + } + + let block = infer_snapshot_block_from_headers(source_datadir)?; + warn!( + target: "reth::cli", + block, + "Could not read Finish stage checkpoint from source DB, using header static-file tip" + ); + Ok(block) +} + +fn infer_snapshot_block_from_db(source_datadir: &std::path::Path) -> Result { + let candidates = [source_datadir.join("db"), source_datadir.to_path_buf()]; + + for db_path in candidates { + if !db_path.exists() { + continue; + } + + let db = match open_db_read_only(&db_path, DatabaseArguments::default()) { + Ok(db) => db, + Err(_) => continue, + }; + + let tx = db.tx()?; + if let Some(checkpoint) = tx.get::(StageId::Finish.to_string())? { + return Ok(checkpoint.block_number); + } + } + + eyre::bail!( + "Could not infer --block from source DB (Finish checkpoint missing); pass --block manually" + ) +} + +fn infer_snapshot_block_from_headers(source_datadir: &std::path::Path) -> Result { + let max_end = header_ranges(source_datadir)? + .into_iter() + .map(|(_, end)| end) + .max() + .ok_or_else(|| eyre::eyre!("No header static files found to infer --block"))?; + Ok(max_end) +} + +fn infer_blocks_per_file(source_datadir: &std::path::Path) -> Result { + let mut inferred = None; + for (start, end) in header_ranges(source_datadir)? { + let span = end.saturating_sub(start).saturating_add(1); + if span == 0 { + continue; + } + + if let Some(existing) = inferred { + if existing != span { + eyre::bail!( + "Inconsistent header static file ranges; pass --blocks-per-file manually" + ); + } + } else { + inferred = Some(span); + } + } + + inferred.ok_or_else(|| { + eyre::eyre!( + "Could not infer --blocks-per-file from header static files; pass it manually (default is {DEFAULT_BLOCKS_PER_STATIC_FILE})" + ) + }) +} + +fn header_ranges(source_datadir: &std::path::Path) -> Result> { + let static_files_dir = source_datadir.join("static_files"); + let static_files_dir = + if static_files_dir.exists() { static_files_dir } else { source_datadir.to_path_buf() }; + + let entries = std::fs::read_dir(&static_files_dir).wrap_err_with(|| { + format!("Failed to read static files directory: {}", static_files_dir.display()) + })?; + + let mut ranges = Vec::new(); + for entry in entries { + let entry = entry?; + let file_name = entry.file_name(); + let file_name = file_name.to_string_lossy(); + if let Some(range) = parse_headers_range(&file_name) { + ranges.push(range); + } + } + + Ok(ranges) +} + +fn parse_headers_range(file_name: &str) -> Option<(u64, u64)> { + let remainder = file_name.strip_prefix("static_file_headers_")?; + let (start, end_with_suffix) = remainder.split_once('_')?; + + let start = start.parse::().ok()?; + let end_digits: String = end_with_suffix.chars().take_while(|ch| ch.is_ascii_digit()).collect(); + let end = end_digits.parse::().ok()?; + + Some((start, end)) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn parse_headers_range_works_with_suffixes() { + assert_eq!(parse_headers_range("static_file_headers_0_499999"), Some((0, 499_999))); + assert_eq!( + parse_headers_range("static_file_headers_500000_999999.jar"), + Some((500_000, 999_999)) + ); + assert_eq!(parse_headers_range("static_file_transactions_0_499999"), None); + } + + #[test] + fn infer_blocks_per_file_from_header_ranges() { + let dir = tempdir().unwrap(); + let sf = dir.path().join("static_files"); + std::fs::create_dir_all(&sf).unwrap(); + std::fs::write(sf.join("static_file_headers_0_499999"), []).unwrap(); + std::fs::write(sf.join("static_file_headers_500000_999999.jar"), []).unwrap(); + + assert_eq!(infer_blocks_per_file(dir.path()).unwrap(), 500_000); + } + + #[test] + fn infer_snapshot_block_from_headers_uses_max_end() { + let dir = tempdir().unwrap(); + let sf = dir.path().join("static_files"); + std::fs::create_dir_all(&sf).unwrap(); + std::fs::write(sf.join("static_file_headers_0_499999"), []).unwrap(); + std::fs::write(sf.join("static_file_headers_500000_999999"), []).unwrap(); + + assert_eq!(infer_snapshot_block_from_headers(dir.path()).unwrap(), 999_999); + } +} diff --git a/crates/cli/commands/src/download/mod.rs b/crates/cli/commands/src/download/mod.rs new file mode 100644 index 0000000000..ef13f150b0 --- /dev/null +++ b/crates/cli/commands/src/download/mod.rs @@ -0,0 +1,1891 @@ +pub mod config_gen; +pub mod manifest; +pub mod manifest_cmd; +mod tui; + +use crate::common::EnvironmentArgs; +use blake3::Hasher; +use clap::Parser; +use config_gen::{config_for_selections, write_config}; +use eyre::Result; +use futures::stream::{self, StreamExt}; +use lz4::Decoder; +use manifest::{ + ArchiveDescriptor, ComponentSelection, OutputFileChecksum, SnapshotComponentType, + SnapshotManifest, +}; +use reqwest::{blocking::Client as BlockingClient, header::RANGE, Client, StatusCode}; +use reth_chainspec::{EthChainSpec, EthereumHardfork, EthereumHardforks}; +use reth_cli::chainspec::ChainSpecParser; +use reth_db::{init_db, Database}; +use reth_db_api::transaction::DbTx; +use reth_fs_util as fs; +use reth_node_core::args::DefaultPruningValues; +use reth_prune_types::PruneMode; +use std::{ + borrow::Cow, + collections::BTreeMap, + fs::OpenOptions, + io::{self, BufWriter, Read, Write}, + path::{Path, PathBuf}, + sync::{ + atomic::{AtomicBool, AtomicU64, Ordering}, + Arc, OnceLock, + }, + time::{Duration, Instant}, +}; +use tar::Archive; +use tokio::task; +use tracing::{info, warn}; +use tui::{run_selector, SelectorOutput}; +use url::Url; +use zstd::stream::read::Decoder as ZstdDecoder; + +const BYTE_UNITS: [&str; 4] = ["B", "KB", "MB", "GB"]; +const MERKLE_BASE_URL: &str = "https://downloads.merkle.io"; +const EXTENSION_TAR_LZ4: &str = ".tar.lz4"; +const EXTENSION_TAR_ZSTD: &str = ".tar.zst"; +const DOWNLOAD_CACHE_DIR: &str = ".download-cache"; + +/// Maximum number of concurrent archive downloads. +const MAX_CONCURRENT_DOWNLOADS: usize = 8; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum SelectionPreset { + Minimal, + Full, + Archive, +} + +struct ResolvedComponents { + selections: BTreeMap, + preset: Option, +} + +/// Global static download defaults +static DOWNLOAD_DEFAULTS: OnceLock = OnceLock::new(); + +/// Download configuration defaults +/// +/// Global defaults can be set via [`DownloadDefaults::try_init`]. +#[derive(Debug, Clone)] +pub struct DownloadDefaults { + /// List of available snapshot sources + pub available_snapshots: Vec>, + /// Default base URL for snapshots + pub default_base_url: Cow<'static, str>, + /// Default base URL for chain-aware snapshots. + /// + /// When set, the chain ID is appended to form the full URL: `{base_url}/{chain_id}`. + /// For example, given a base URL of `https://snapshots.example.com` and chain ID `1`, + /// the resulting URL would be `https://snapshots.example.com/1`. + /// + /// Falls back to [`default_base_url`](Self::default_base_url) when `None`. + pub default_chain_aware_base_url: Option>, + /// Optional custom long help text that overrides the generated help + pub long_help: Option, +} + +impl DownloadDefaults { + /// Initialize the global download defaults with this configuration + pub fn try_init(self) -> Result<(), Self> { + DOWNLOAD_DEFAULTS.set(self) + } + + /// Get a reference to the global download defaults + pub fn get_global() -> &'static DownloadDefaults { + DOWNLOAD_DEFAULTS.get_or_init(DownloadDefaults::default_download_defaults) + } + + /// Default download configuration with defaults from merkle.io and publicnode + pub fn default_download_defaults() -> Self { + Self { + available_snapshots: vec![ + Cow::Borrowed("https://www.merkle.io/snapshots (default, mainnet archive)"), + Cow::Borrowed("https://publicnode.com/snapshots (full nodes & testnets)"), + ], + default_base_url: Cow::Borrowed(MERKLE_BASE_URL), + default_chain_aware_base_url: None, + long_help: None, + } + } + + /// Generates the long help text for the download URL argument using these defaults. + /// + /// If a custom long_help is set, it will be returned. Otherwise, help text is generated + /// from the available_snapshots list. + pub fn long_help(&self) -> String { + if let Some(ref custom_help) = self.long_help { + return custom_help.clone(); + } + + let mut help = String::from( + "Specify a snapshot URL or let the command propose a default one.\n\nAvailable snapshot sources:\n", + ); + + for source in &self.available_snapshots { + help.push_str("- "); + help.push_str(source); + help.push('\n'); + } + + help.push_str( + "\nIf no URL is provided, the latest archive snapshot for the selected chain\nwill be proposed for download from ", + ); + help.push_str( + self.default_chain_aware_base_url.as_deref().unwrap_or(&self.default_base_url), + ); + help.push_str( + ".\n\nLocal file:// URLs are also supported for extracting snapshots from disk.", + ); + help + } + + /// Add a snapshot source to the list + pub fn with_snapshot(mut self, source: impl Into>) -> Self { + self.available_snapshots.push(source.into()); + self + } + + /// Replace all snapshot sources + pub fn with_snapshots(mut self, sources: Vec>) -> Self { + self.available_snapshots = sources; + self + } + + /// Set the default base URL, e.g. `https://downloads.merkle.io`. + pub fn with_base_url(mut self, url: impl Into>) -> Self { + self.default_base_url = url.into(); + self + } + + /// Set the default chain-aware base URL. + pub fn with_chain_aware_base_url(mut self, url: impl Into>) -> Self { + self.default_chain_aware_base_url = Some(url.into()); + self + } + + /// Builder: Set custom long help text, overriding the generated help + pub fn with_long_help(mut self, help: impl Into) -> Self { + self.long_help = Some(help.into()); + self + } +} + +impl Default for DownloadDefaults { + fn default() -> Self { + Self::default_download_defaults() + } +} + +/// CLI command that downloads snapshot archives and configures a reth node from them. +#[derive(Debug, Parser)] +pub struct DownloadCommand { + #[command(flatten)] + env: EnvironmentArgs, + + /// Custom URL to download a single snapshot archive (legacy mode). + /// + /// When provided, downloads and extracts a single archive without component selection. + #[arg(long, short, long_help = DownloadDefaults::get_global().long_help())] + url: Option, + + /// URL to a snapshot manifest.json for modular component downloads. + /// + /// When provided, fetches this manifest instead of discovering it from the default + /// base URL. Useful for testing with custom or local manifests. + #[arg(long, value_name = "URL", conflicts_with = "url")] + manifest_url: Option, + + /// Local path to a snapshot manifest.json for modular component downloads. + #[arg(long, value_name = "PATH", conflicts_with_all = ["url", "manifest_url"])] + manifest_path: Option, + + /// Include transaction static files. + #[arg(long, conflicts_with_all = ["minimal", "full", "archive"])] + with_txs: bool, + + /// Include receipt static files. + #[arg(long, conflicts_with_all = ["minimal", "full", "archive"])] + with_receipts: bool, + + /// Include account and storage history static files. + #[arg(long, alias = "with-changesets", conflicts_with_all = ["minimal", "full", "archive"])] + with_state_history: bool, + + /// Download all available components (archive node, no pruning). + #[arg(long, alias = "all", conflicts_with_all = ["with_txs", "with_receipts", "with_state_history", "minimal", "full"])] + archive: bool, + + /// Download the minimal component set (same default as --non-interactive). + #[arg(long, conflicts_with_all = ["with_txs", "with_receipts", "with_state_history", "archive", "full"])] + minimal: bool, + + /// Download the full node component set (matches default full prune settings). + #[arg(long, conflicts_with_all = ["with_txs", "with_receipts", "with_state_history", "archive", "minimal"])] + full: bool, + + /// Skip optional RocksDB indices even when archive components are selected. + /// + /// This affects `--archive`/`--all` and TUI archive preset (`a`). + #[arg(long, conflicts_with = "url")] + without_rocksdb: bool, + + /// Skip interactive component selection. Downloads the minimal set + /// (state + headers + transactions + changesets) unless explicit --with-* flags narrow it. + #[arg(long, short = 'y')] + non_interactive: bool, + + /// Use resumable two-phase downloads (download to disk first, then extract). + /// + /// Archives are downloaded to a .part file with HTTP Range resume support + /// before extraction. Slower but tolerates network interruptions without + /// restarting. By default, archives stream directly into the extractor. + #[arg(long)] + resumable: bool, + + /// Maximum number of concurrent modular archive workers. + #[arg(long, default_value_t = MAX_CONCURRENT_DOWNLOADS)] + download_concurrency: usize, +} + +impl> DownloadCommand { + pub async fn execute(self) -> Result<()> { + let chain = self.env.chain.chain(); + let chain_id = chain.id(); + let data_dir = self.env.datadir.clone().resolve_datadir(chain); + fs::create_dir_all(&data_dir)?; + + // Legacy single-URL mode: download one archive and extract it + if let Some(url) = self.url { + info!(target: "reth::cli", + dir = ?data_dir.data_dir(), + url = %url, + "Starting snapshot download and extraction" + ); + + stream_and_extract(&url, data_dir.data_dir(), None, self.resumable).await?; + info!(target: "reth::cli", "Snapshot downloaded and extracted successfully"); + + return Ok(()); + } + + // Modular download: fetch manifest and select components + let manifest_source = self.resolve_manifest_source(chain_id); + + info!(target: "reth::cli", source = %manifest_source, "Fetching snapshot manifest"); + let mut manifest = fetch_manifest_from_source(&manifest_source).await?; + manifest.base_url = Some(resolve_manifest_base_url(&manifest, &manifest_source)?); + + info!(target: "reth::cli", + block = manifest.block, + chain_id = manifest.chain_id, + storage_version = %manifest.storage_version, + components = manifest.components.len(), + "Loaded snapshot manifest" + ); + + let ResolvedComponents { mut selections, preset } = self.resolve_components(&manifest)?; + + if matches!(preset, Some(SelectionPreset::Archive)) { + inject_archive_only_components(&mut selections, &manifest, !self.without_rocksdb); + } + + // Collect all archive descriptors across selected components. + let target_dir = data_dir.data_dir(); + let mut all_downloads: Vec = Vec::new(); + for (ty, sel) in &selections { + let distance = match sel { + ComponentSelection::All => None, + ComponentSelection::Distance(d) => Some(*d), + ComponentSelection::None => continue, + }; + let descriptors = manifest.archive_descriptors_for_distance(*ty, distance); + let name = ty.display_name().to_string(); + + if !descriptors.is_empty() { + info!(target: "reth::cli", + component = %name, + archives = descriptors.len(), + selection = %sel, + "Queued component for download" + ); + } + + for descriptor in descriptors { + if descriptor.output_files.is_empty() { + eyre::bail!( + "Invalid modular manifest: {} is missing plain output checksum metadata", + descriptor.file_name + ); + } + all_downloads.push(PlannedArchive { + ty: *ty, + component: name.clone(), + archive: descriptor, + }); + } + } + + all_downloads.sort_by(|a, b| { + archive_priority_rank(a.ty) + .cmp(&archive_priority_rank(b.ty)) + .then_with(|| a.component.cmp(&b.component)) + .then_with(|| a.archive.file_name.cmp(&b.archive.file_name)) + }); + + let download_cache_dir = if self.resumable { + let dir = target_dir.join(DOWNLOAD_CACHE_DIR); + fs::create_dir_all(&dir)?; + Some(dir) + } else { + None + }; + + let total_archives = all_downloads.len(); + let total_size: u64 = selections + .iter() + .map(|(ty, sel)| match sel { + ComponentSelection::All => manifest.size_for_distance(*ty, None), + ComponentSelection::Distance(d) => manifest.size_for_distance(*ty, Some(*d)), + ComponentSelection::None => 0, + }) + .sum(); + + let startup_summary = summarize_download_startup(&all_downloads, target_dir)?; + info!(target: "reth::cli", + reusable = startup_summary.reusable, + needs_download = startup_summary.needs_download, + "Startup integrity summary (plain output files)" + ); + + info!(target: "reth::cli", + archives = total_archives, + total = %DownloadProgress::format_size(total_size), + "Downloading all archives" + ); + + let shared = SharedProgress::new(total_size, total_archives as u64); + let progress_handle = spawn_progress_display(Arc::clone(&shared)); + + let target = target_dir.to_path_buf(); + let cache_dir = download_cache_dir; + let resumable = self.resumable; + let download_concurrency = self.download_concurrency.max(1); + let results: Vec> = stream::iter(all_downloads) + .map(|planned| { + let dir = target.clone(); + let cache = cache_dir.clone(); + let sp = Arc::clone(&shared); + async move { + process_modular_archive(planned, &dir, cache.as_deref(), Some(sp), resumable) + .await?; + Ok(()) + } + }) + .buffer_unordered(download_concurrency) + .collect() + .await; + + shared.done.store(true, Ordering::Relaxed); + let _ = progress_handle.await; + + // Check for errors + for result in results { + result?; + } + + // Generate reth.toml and set prune checkpoints + let config = + config_for_selections(&selections, &manifest, preset, Some(self.env.chain.as_ref())); + if write_config(&config, target_dir)? { + let desc = config_gen::describe_prune_config(&config); + info!(target: "reth::cli", "{}", desc.join(", ")); + } + + // Open the DB to write checkpoints + let db_path = data_dir.db(); + let db = init_db(&db_path, self.env.db.database_args())?; + + // Write prune checkpoints to the DB so the pruner knows data before the + // snapshot block is already in the expected pruned state + let should_write_prune = config.prune.segments != Default::default(); + let should_reset_indices = should_reset_index_stage_checkpoints(&selections); + if should_write_prune || should_reset_indices { + let tx = db.tx_mut()?; + + if should_write_prune { + config_gen::write_prune_checkpoints_tx(&tx, &config, manifest.block)?; + } + + // Reset stage checkpoints for history indexing stages only if RocksDB + // indices weren't downloaded. When archive snapshots include the + // optional RocksDB indices component, we preserve source checkpoints. + if should_reset_indices { + config_gen::reset_index_stage_checkpoints_tx(&tx)?; + } + + tx.commit()?; + } + + info!(target: "reth::cli", "Snapshot download complete. Run `reth node` to start syncing."); + + Ok(()) + } + + /// Determines which components to download based on CLI flags or interactive selection. + fn resolve_components(&self, manifest: &SnapshotManifest) -> Result { + let available = |ty: SnapshotComponentType| manifest.component(ty).is_some(); + + // --archive/--all: everything available as All + if self.archive { + return Ok(ResolvedComponents { + selections: SnapshotComponentType::ALL + .iter() + .copied() + .filter(|ty| available(*ty)) + .filter(|ty| { + !self.without_rocksdb || *ty != SnapshotComponentType::RocksdbIndices + }) + .map(|ty| (ty, ComponentSelection::All)) + .collect(), + preset: Some(SelectionPreset::Archive), + }); + } + + if self.full { + return Ok(ResolvedComponents { + selections: self.full_preset_selections(manifest), + preset: Some(SelectionPreset::Full), + }); + } + + if self.minimal { + return Ok(ResolvedComponents { + selections: self.minimal_preset_selections(manifest), + preset: Some(SelectionPreset::Minimal), + }); + } + + let has_explicit_flags = self.with_txs || self.with_receipts || self.with_state_history; + + if has_explicit_flags { + let mut selections = BTreeMap::new(); + // Required components always All + if available(SnapshotComponentType::State) { + selections.insert(SnapshotComponentType::State, ComponentSelection::All); + } + if available(SnapshotComponentType::Headers) { + selections.insert(SnapshotComponentType::Headers, ComponentSelection::All); + } + if self.with_txs && available(SnapshotComponentType::Transactions) { + selections.insert(SnapshotComponentType::Transactions, ComponentSelection::All); + } + if self.with_receipts && available(SnapshotComponentType::Receipts) { + selections.insert(SnapshotComponentType::Receipts, ComponentSelection::All); + } + if self.with_state_history { + if available(SnapshotComponentType::AccountChangesets) { + selections + .insert(SnapshotComponentType::AccountChangesets, ComponentSelection::All); + } + if available(SnapshotComponentType::StorageChangesets) { + selections + .insert(SnapshotComponentType::StorageChangesets, ComponentSelection::All); + } + } + return Ok(ResolvedComponents { selections, preset: None }); + } + + if self.non_interactive { + return Ok(ResolvedComponents { + selections: self.minimal_preset_selections(manifest), + preset: Some(SelectionPreset::Minimal), + }); + } + + // Interactive TUI + let full_preset = self.full_preset_selections(manifest); + let SelectorOutput { selections, preset } = run_selector(manifest.clone(), &full_preset)?; + let selected = + selections.into_iter().filter(|(_, sel)| *sel != ComponentSelection::None).collect(); + + Ok(ResolvedComponents { selections: selected, preset }) + } + + fn minimal_preset_selections( + &self, + manifest: &SnapshotManifest, + ) -> BTreeMap { + SnapshotComponentType::ALL + .iter() + .copied() + .filter(|ty| manifest.component(*ty).is_some()) + .map(|ty| (ty, ty.minimal_selection())) + .collect() + } + + fn full_preset_selections( + &self, + manifest: &SnapshotManifest, + ) -> BTreeMap { + let mut selections = BTreeMap::new(); + + for ty in [ + SnapshotComponentType::State, + SnapshotComponentType::Headers, + SnapshotComponentType::Transactions, + SnapshotComponentType::Receipts, + SnapshotComponentType::AccountChangesets, + SnapshotComponentType::StorageChangesets, + SnapshotComponentType::TransactionSenders, + SnapshotComponentType::RocksdbIndices, + ] { + if manifest.component(ty).is_none() { + continue; + } + + let selection = self.full_selection_for_component(ty, manifest.block); + if selection != ComponentSelection::None { + selections.insert(ty, selection); + } + } + + selections + } + + fn full_selection_for_component( + &self, + ty: SnapshotComponentType, + snapshot_block: u64, + ) -> ComponentSelection { + let defaults = DefaultPruningValues::get_global(); + match ty { + SnapshotComponentType::State | SnapshotComponentType::Headers => { + ComponentSelection::All + } + SnapshotComponentType::Transactions => { + if defaults.full_bodies_history_use_pre_merge { + match self + .env + .chain + .ethereum_fork_activation(EthereumHardfork::Paris) + .block_number() + { + Some(paris) if snapshot_block >= paris => { + ComponentSelection::Distance(snapshot_block - paris + 1) + } + Some(_) => ComponentSelection::None, + None => ComponentSelection::All, + } + } else { + selection_from_prune_mode( + defaults.full_prune_modes.bodies_history, + snapshot_block, + ) + } + } + SnapshotComponentType::Receipts => { + selection_from_prune_mode(defaults.full_prune_modes.receipts, snapshot_block) + } + SnapshotComponentType::AccountChangesets => { + selection_from_prune_mode(defaults.full_prune_modes.account_history, snapshot_block) + } + SnapshotComponentType::StorageChangesets => { + selection_from_prune_mode(defaults.full_prune_modes.storage_history, snapshot_block) + } + SnapshotComponentType::TransactionSenders => { + selection_from_prune_mode(defaults.full_prune_modes.sender_recovery, snapshot_block) + } + // Keep hidden by default in full mode; if users want indices they can use archive. + SnapshotComponentType::RocksdbIndices => ComponentSelection::None, + } + } + + fn resolve_manifest_source(&self, chain_id: u64) -> String { + if let Some(path) = &self.manifest_path { + return path.display().to_string(); + } + + match &self.manifest_url { + Some(url) => url.clone(), + None => { + let base_url = get_base_url(chain_id); + format!("{base_url}/manifest.json") + } + } + } +} + +fn selection_from_prune_mode(mode: Option, snapshot_block: u64) -> ComponentSelection { + match mode { + None => ComponentSelection::All, + Some(PruneMode::Full) => ComponentSelection::None, + Some(PruneMode::Distance(d)) => ComponentSelection::Distance(d), + Some(PruneMode::Before(block)) => { + if snapshot_block >= block { + ComponentSelection::Distance(snapshot_block - block + 1) + } else { + ComponentSelection::None + } + } + } +} + +/// If all data components (txs, receipts, changesets) are `All`, automatically +/// include hidden archive-only components when available in the manifest. +fn inject_archive_only_components( + selections: &mut BTreeMap, + manifest: &SnapshotManifest, + include_rocksdb: bool, +) { + let is_all = + |ty: SnapshotComponentType| selections.get(&ty).copied() == Some(ComponentSelection::All); + + let is_archive = is_all(SnapshotComponentType::Transactions) && + is_all(SnapshotComponentType::Receipts) && + is_all(SnapshotComponentType::AccountChangesets) && + is_all(SnapshotComponentType::StorageChangesets); + + if !is_archive { + return; + } + + for component in + [SnapshotComponentType::TransactionSenders, SnapshotComponentType::RocksdbIndices] + { + if component == SnapshotComponentType::RocksdbIndices && !include_rocksdb { + continue; + } + + if manifest.component(component).is_some() { + selections.insert(component, ComponentSelection::All); + } + } +} + +fn should_reset_index_stage_checkpoints( + selections: &BTreeMap, +) -> bool { + !matches!(selections.get(&SnapshotComponentType::RocksdbIndices), Some(ComponentSelection::All)) +} + +impl DownloadCommand { + /// Returns the underlying chain being used to run this command + pub fn chain_spec(&self) -> Option<&Arc> { + Some(&self.env.chain) + } +} + +/// Tracks download progress and throttles display updates to every 100ms. +pub(crate) struct DownloadProgress { + downloaded: u64, + total_size: u64, + last_displayed: Instant, + started_at: Instant, +} + +#[derive(Debug, Clone)] +struct PlannedArchive { + ty: SnapshotComponentType, + component: String, + archive: ArchiveDescriptor, +} + +const fn archive_priority_rank(ty: SnapshotComponentType) -> u8 { + match ty { + SnapshotComponentType::State => 0, + SnapshotComponentType::RocksdbIndices => 1, + _ => 2, + } +} + +#[derive(Debug, Default, Clone, Copy)] +struct DownloadStartupSummary { + reusable: usize, + needs_download: usize, +} + +fn summarize_download_startup( + all_downloads: &[PlannedArchive], + target_dir: &Path, +) -> Result { + let mut summary = DownloadStartupSummary::default(); + + for planned in all_downloads { + if verify_output_files(target_dir, &planned.archive.output_files)? { + summary.reusable += 1; + } else { + summary.needs_download += 1; + } + } + + Ok(summary) +} + +impl DownloadProgress { + /// Creates new progress tracker with given total size + fn new(total_size: u64) -> Self { + let now = Instant::now(); + Self { downloaded: 0, total_size, last_displayed: now, started_at: now } + } + + /// Converts bytes to human readable format (B, KB, MB, GB) + pub(crate) fn format_size(size: u64) -> String { + let mut size = size as f64; + let mut unit_index = 0; + + while size >= 1024.0 && unit_index < BYTE_UNITS.len() - 1 { + size /= 1024.0; + unit_index += 1; + } + + format!("{:.2} {}", size, BYTE_UNITS[unit_index]) + } + + /// Format duration as human readable string + fn format_duration(duration: Duration) -> String { + let secs = duration.as_secs(); + if secs < 60 { + format!("{secs}s") + } else if secs < 3600 { + format!("{}m {}s", secs / 60, secs % 60) + } else { + format!("{}h {}m", secs / 3600, (secs % 3600) / 60) + } + } + + /// Updates progress bar (for single-archive legacy downloads) + fn update(&mut self, chunk_size: u64) -> Result<()> { + self.downloaded += chunk_size; + + if self.last_displayed.elapsed() >= Duration::from_millis(100) { + let formatted_downloaded = Self::format_size(self.downloaded); + let formatted_total = Self::format_size(self.total_size); + let progress = (self.downloaded as f64 / self.total_size as f64) * 100.0; + + let elapsed = self.started_at.elapsed(); + let eta = if self.downloaded > 0 { + let remaining = self.total_size.saturating_sub(self.downloaded); + let speed = self.downloaded as f64 / elapsed.as_secs_f64(); + if speed > 0.0 { + Duration::from_secs_f64(remaining as f64 / speed) + } else { + Duration::ZERO + } + } else { + Duration::ZERO + }; + let eta_str = Self::format_duration(eta); + + print!( + "\rDownloading and extracting... {progress:.2}% ({formatted_downloaded} / {formatted_total}) ETA: {eta_str} ", + ); + io::stdout().flush()?; + self.last_displayed = Instant::now(); + } + + Ok(()) + } +} + +/// Shared progress counter for parallel downloads. +/// +/// Each download thread atomically increments `downloaded`. A single display +/// task on the main thread reads the counter periodically and prints one +/// aggregated progress line. +struct SharedProgress { + downloaded: AtomicU64, + total_size: u64, + total_archives: u64, + archives_done: AtomicU64, + done: AtomicBool, +} + +impl SharedProgress { + fn new(total_size: u64, total_archives: u64) -> Arc { + Arc::new(Self { + downloaded: AtomicU64::new(0), + total_size, + total_archives, + archives_done: AtomicU64::new(0), + done: AtomicBool::new(false), + }) + } + + fn add(&self, bytes: u64) { + self.downloaded.fetch_add(bytes, Ordering::Relaxed); + } + + fn archive_done(&self) { + self.archives_done.fetch_add(1, Ordering::Relaxed); + } +} + +/// Spawns a background task that prints aggregated download progress. +/// Returns a handle; drop it (or call `.abort()`) to stop. +fn spawn_progress_display(progress: Arc) -> tokio::task::JoinHandle<()> { + tokio::spawn(async move { + let started_at = Instant::now(); + let mut interval = tokio::time::interval(Duration::from_secs(3)); + interval.tick().await; // first tick is immediate, skip it + loop { + interval.tick().await; + + if progress.done.load(Ordering::Relaxed) { + break; + } + + let downloaded = progress.downloaded.load(Ordering::Relaxed); + let total = progress.total_size; + if total == 0 { + continue; + } + + let done = progress.archives_done.load(Ordering::Relaxed); + let all = progress.total_archives; + let pct = (downloaded as f64 / total as f64) * 100.0; + let dl = DownloadProgress::format_size(downloaded); + let tot = DownloadProgress::format_size(total); + + let elapsed = started_at.elapsed(); + let remaining = total.saturating_sub(downloaded); + + if remaining == 0 { + // Downloads done, waiting for extraction + info!(target: "reth::cli", + archives = format_args!("{done}/{all}"), + downloaded = %dl, + "Extracting remaining archives" + ); + } else { + let eta = if downloaded > 0 { + let speed = downloaded as f64 / elapsed.as_secs_f64(); + if speed > 0.0 { + DownloadProgress::format_duration(Duration::from_secs_f64( + remaining as f64 / speed, + )) + } else { + "??".to_string() + } + } else { + "??".to_string() + }; + + info!(target: "reth::cli", + archives = format_args!("{done}/{all}"), + progress = format_args!("{pct:.1}%"), + downloaded = %dl, + total = %tot, + eta = %eta, + "Downloading" + ); + } + } + + // Final line + let downloaded = progress.downloaded.load(Ordering::Relaxed); + let dl = DownloadProgress::format_size(downloaded); + let tot = DownloadProgress::format_size(progress.total_size); + let elapsed = DownloadProgress::format_duration(started_at.elapsed()); + info!(target: "reth::cli", + downloaded = %dl, + total = %tot, + elapsed = %elapsed, + "Downloads complete" + ); + }) +} + +/// Adapter to track progress while reading (used for extraction in legacy path) +struct ProgressReader { + reader: R, + progress: DownloadProgress, +} + +impl ProgressReader { + fn new(reader: R, total_size: u64) -> Self { + Self { reader, progress: DownloadProgress::new(total_size) } + } +} + +impl Read for ProgressReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let bytes = self.reader.read(buf)?; + if bytes > 0 && + let Err(e) = self.progress.update(bytes as u64) + { + return Err(io::Error::other(e)); + } + Ok(bytes) + } +} + +/// Supported compression formats for snapshots +#[derive(Debug, Clone, Copy)] +enum CompressionFormat { + Lz4, + Zstd, +} + +impl CompressionFormat { + /// Detect compression format from file extension + fn from_url(url: &str) -> Result { + let path = + Url::parse(url).map(|u| u.path().to_string()).unwrap_or_else(|_| url.to_string()); + + if path.ends_with(EXTENSION_TAR_LZ4) { + Ok(Self::Lz4) + } else if path.ends_with(EXTENSION_TAR_ZSTD) { + Ok(Self::Zstd) + } else { + Err(eyre::eyre!( + "Unsupported file format. Expected .tar.lz4 or .tar.zst, got: {}", + path + )) + } + } +} + +/// Extracts a compressed tar archive to the target directory with progress tracking. +fn extract_archive( + reader: R, + total_size: u64, + format: CompressionFormat, + target_dir: &Path, +) -> Result<()> { + let progress_reader = ProgressReader::new(reader, total_size); + + match format { + CompressionFormat::Lz4 => { + let decoder = Decoder::new(progress_reader)?; + Archive::new(decoder).unpack(target_dir)?; + } + CompressionFormat::Zstd => { + let decoder = ZstdDecoder::new(progress_reader)?; + Archive::new(decoder).unpack(target_dir)?; + } + } + + println!(); + Ok(()) +} + +/// Extracts a compressed tar archive without progress tracking. +fn extract_archive_raw( + reader: R, + format: CompressionFormat, + target_dir: &Path, +) -> Result<()> { + match format { + CompressionFormat::Lz4 => { + Archive::new(Decoder::new(reader)?).unpack(target_dir)?; + } + CompressionFormat::Zstd => { + Archive::new(ZstdDecoder::new(reader)?).unpack(target_dir)?; + } + } + Ok(()) +} + +/// Extracts a snapshot from a local file. +fn extract_from_file(path: &Path, format: CompressionFormat, target_dir: &Path) -> Result<()> { + let file = std::fs::File::open(path)?; + let total_size = file.metadata()?.len(); + info!(target: "reth::cli", + file = %path.display(), + size = %DownloadProgress::format_size(total_size), + "Extracting local archive" + ); + let start = Instant::now(); + extract_archive(file, total_size, format, target_dir)?; + info!(target: "reth::cli", + file = %path.display(), + elapsed = %DownloadProgress::format_duration(start.elapsed()), + "Local extraction complete" + ); + Ok(()) +} + +const MAX_DOWNLOAD_RETRIES: u32 = 10; +const RETRY_BACKOFF_SECS: u64 = 5; + +/// Wrapper that tracks download progress while writing data. +/// Used with [`io::copy`] to display progress during downloads. +struct ProgressWriter { + inner: W, + progress: DownloadProgress, +} + +impl Write for ProgressWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + let n = self.inner.write(buf)?; + let _ = self.progress.update(n as u64); + Ok(n) + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.flush() + } +} + +/// Wrapper that bumps a shared atomic counter while writing data. +/// Used for parallel downloads where a single display task shows aggregated progress. +struct SharedProgressWriter { + inner: W, + progress: Arc, +} + +impl Write for SharedProgressWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + let n = self.inner.write(buf)?; + self.progress.add(n as u64); + Ok(n) + } + + fn flush(&mut self) -> io::Result<()> { + self.inner.flush() + } +} + +/// Wrapper that bumps a shared atomic counter while reading data. +/// Used for streaming downloads where a single display task shows aggregated progress. +struct SharedProgressReader { + inner: R, + progress: Arc, +} + +impl Read for SharedProgressReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let n = self.inner.read(buf)?; + self.progress.add(n as u64); + Ok(n) + } +} + +/// Downloads a file with resume support using HTTP Range requests. +/// Automatically retries on failure, resuming from where it left off. +/// Returns the path to the downloaded file and its total size. +/// +/// When `shared` is provided, progress is reported to the shared counter +/// (for parallel downloads). Otherwise uses a local progress bar. +fn resumable_download( + url: &str, + target_dir: &Path, + shared: Option<&Arc>, +) -> Result<(PathBuf, u64)> { + let file_name = Url::parse(url) + .ok() + .and_then(|u| u.path_segments()?.next_back().map(|s| s.to_string())) + .unwrap_or_else(|| "snapshot.tar".to_string()); + + let final_path = target_dir.join(&file_name); + let part_path = target_dir.join(format!("{file_name}.part")); + + let quiet = shared.is_some(); + + if !quiet { + info!(target: "reth::cli", file = %file_name, "Connecting to download server"); + } + let client = BlockingClient::builder().timeout(Duration::from_secs(30)).build()?; + + let mut total_size: Option = None; + let mut last_error: Option = None; + + let finalize_download = |size: u64| -> Result<(PathBuf, u64)> { + fs::rename(&part_path, &final_path)?; + if !quiet { + info!(target: "reth::cli", file = %file_name, "Download complete"); + } + Ok((final_path.clone(), size)) + }; + + for attempt in 1..=MAX_DOWNLOAD_RETRIES { + let existing_size = fs::metadata(&part_path).map(|m| m.len()).unwrap_or(0); + + if let Some(total) = total_size && + existing_size >= total + { + return finalize_download(total); + } + + if attempt > 1 { + info!(target: "reth::cli", + file = %file_name, + "Retry attempt {}/{} - resuming from {} bytes", + attempt, MAX_DOWNLOAD_RETRIES, existing_size + ); + } + + let mut request = client.get(url); + if existing_size > 0 { + request = request.header(RANGE, format!("bytes={existing_size}-")); + if !quiet && attempt == 1 { + info!(target: "reth::cli", file = %file_name, "Resuming from {} bytes", existing_size); + } + } + + let response = match request.send().and_then(|r| r.error_for_status()) { + Ok(r) => r, + Err(e) => { + last_error = Some(e.into()); + if attempt < MAX_DOWNLOAD_RETRIES { + info!(target: "reth::cli", + file = %file_name, + "Download failed, retrying in {RETRY_BACKOFF_SECS}s..." + ); + std::thread::sleep(Duration::from_secs(RETRY_BACKOFF_SECS)); + } + continue; + } + }; + + let is_partial = response.status() == StatusCode::PARTIAL_CONTENT; + + let size = if is_partial { + response + .headers() + .get("Content-Range") + .and_then(|v| v.to_str().ok()) + .and_then(|v| v.split('/').next_back()) + .and_then(|v| v.parse().ok()) + } else { + response.content_length() + }; + + if total_size.is_none() { + total_size = size; + if !quiet && let Some(s) = size { + info!(target: "reth::cli", + file = %file_name, + size = %DownloadProgress::format_size(s), + "Downloading" + ); + } + } + + let current_total = total_size.ok_or_else(|| { + eyre::eyre!("Server did not provide Content-Length or Content-Range header") + })?; + + let file = if is_partial && existing_size > 0 { + OpenOptions::new() + .append(true) + .open(&part_path) + .map_err(|e| fs::FsPathError::open(e, &part_path))? + } else { + fs::create_file(&part_path)? + }; + + let start_offset = if is_partial { existing_size } else { 0 }; + let mut reader = response; + + let copy_result; + let flush_result; + + if let Some(sp) = shared { + // Parallel path: bump shared atomic counter + if start_offset > 0 { + sp.add(start_offset); + } + let mut writer = + SharedProgressWriter { inner: BufWriter::new(file), progress: Arc::clone(sp) }; + copy_result = io::copy(&mut reader, &mut writer); + flush_result = writer.inner.flush(); + } else { + // Legacy single-download path: local progress bar + let mut progress = DownloadProgress::new(current_total); + progress.downloaded = start_offset; + let mut writer = ProgressWriter { inner: BufWriter::new(file), progress }; + copy_result = io::copy(&mut reader, &mut writer); + flush_result = writer.inner.flush(); + println!(); + } + + if let Err(e) = copy_result.and(flush_result) { + last_error = Some(e.into()); + if attempt < MAX_DOWNLOAD_RETRIES { + info!(target: "reth::cli", + file = %file_name, + "Download interrupted, retrying in {RETRY_BACKOFF_SECS}s..." + ); + std::thread::sleep(Duration::from_secs(RETRY_BACKOFF_SECS)); + } + continue; + } + + return finalize_download(current_total); + } + + Err(last_error + .unwrap_or_else(|| eyre::eyre!("Download failed after {} attempts", MAX_DOWNLOAD_RETRIES))) +} + +/// Streams a remote archive directly into the extractor without writing to disk. +/// +/// On failure, retries from scratch up to [`MAX_DOWNLOAD_RETRIES`] times. +fn streaming_download_and_extract( + url: &str, + format: CompressionFormat, + target_dir: &Path, + shared: Option<&Arc>, +) -> Result<()> { + let quiet = shared.is_some(); + let mut last_error: Option = None; + + for attempt in 1..=MAX_DOWNLOAD_RETRIES { + if attempt > 1 { + info!(target: "reth::cli", + url = %url, + attempt, + max = MAX_DOWNLOAD_RETRIES, + "Retrying streaming download from scratch" + ); + } + + let client = BlockingClient::builder().connect_timeout(Duration::from_secs(30)).build()?; + + let response = match client.get(url).send().and_then(|r| r.error_for_status()) { + Ok(r) => r, + Err(e) => { + last_error = Some(e.into()); + if attempt < MAX_DOWNLOAD_RETRIES { + std::thread::sleep(Duration::from_secs(RETRY_BACKOFF_SECS)); + } + continue; + } + }; + + if !quiet && let Some(size) = response.content_length() { + info!(target: "reth::cli", + url = %url, + size = %DownloadProgress::format_size(size), + "Streaming archive" + ); + } + + let result = if let Some(sp) = shared { + let reader = SharedProgressReader { inner: response, progress: Arc::clone(sp) }; + extract_archive_raw(reader, format, target_dir) + } else { + extract_archive_raw(response, format, target_dir) + }; + + match result { + Ok(()) => return Ok(()), + Err(e) => { + last_error = Some(e); + if attempt < MAX_DOWNLOAD_RETRIES { + std::thread::sleep(Duration::from_secs(RETRY_BACKOFF_SECS)); + } + } + } + } + + Err(last_error.unwrap_or_else(|| { + eyre::eyre!("Streaming download failed after {MAX_DOWNLOAD_RETRIES} attempts") + })) +} + +/// Fetches the snapshot from a remote URL with resume support, then extracts it. +fn download_and_extract( + url: &str, + format: CompressionFormat, + target_dir: &Path, + shared: Option<&Arc>, +) -> Result<()> { + let quiet = shared.is_some(); + let (downloaded_path, total_size) = resumable_download(url, target_dir, shared)?; + + let file_name = + downloaded_path.file_name().map(|f| f.to_string_lossy().to_string()).unwrap_or_default(); + + if !quiet { + info!(target: "reth::cli", + file = %file_name, + size = %DownloadProgress::format_size(total_size), + "Extracting archive" + ); + } + let file = fs::open(&downloaded_path)?; + + if quiet { + // Skip progress tracking for extraction in parallel mode + extract_archive_raw(file, format, target_dir)?; + } else { + extract_archive(file, total_size, format, target_dir)?; + info!(target: "reth::cli", + file = %file_name, + "Extraction complete" + ); + } + + fs::remove_file(&downloaded_path)?; + + if let Some(sp) = shared { + sp.archive_done(); + } + + Ok(()) +} + +/// Downloads and extracts a snapshot, blocking until finished. +/// +/// Supports `file://` URLs for local files and HTTP(S) URLs for remote downloads. +/// When `resumable` is true, downloads to a `.part` file first with HTTP Range resume +/// support. Otherwise streams directly into the extractor. +fn blocking_download_and_extract( + url: &str, + target_dir: &Path, + shared: Option>, + resumable: bool, +) -> Result<()> { + let format = CompressionFormat::from_url(url)?; + + if let Ok(parsed_url) = Url::parse(url) && + parsed_url.scheme() == "file" + { + let file_path = parsed_url + .to_file_path() + .map_err(|_| eyre::eyre!("Invalid file:// URL path: {}", url))?; + let result = extract_from_file(&file_path, format, target_dir); + if result.is_ok() && + let Some(sp) = shared + { + sp.archive_done(); + } + result + } else if resumable { + download_and_extract(url, format, target_dir, shared.as_ref()) + } else { + let result = streaming_download_and_extract(url, format, target_dir, shared.as_ref()); + if result.is_ok() && + let Some(sp) = shared + { + sp.archive_done(); + } + result + } +} + +/// Downloads and extracts a snapshot archive asynchronously. +/// +/// When `shared` is provided, download progress is reported to the shared +/// counter for aggregated display. Otherwise uses a local progress bar. +/// When `resumable` is true, uses two-phase download with `.part` files. +async fn stream_and_extract( + url: &str, + target_dir: &Path, + shared: Option>, + resumable: bool, +) -> Result<()> { + let target_dir = target_dir.to_path_buf(); + let url = url.to_string(); + task::spawn_blocking(move || { + blocking_download_and_extract(&url, &target_dir, shared, resumable) + }) + .await??; + + Ok(()) +} + +async fn process_modular_archive( + planned: PlannedArchive, + target_dir: &Path, + cache_dir: Option<&Path>, + shared: Option>, + resumable: bool, +) -> Result<()> { + let target_dir = target_dir.to_path_buf(); + let cache_dir = cache_dir.map(Path::to_path_buf); + + task::spawn_blocking(move || { + blocking_process_modular_archive( + &planned, + &target_dir, + cache_dir.as_deref(), + shared, + resumable, + ) + }) + .await??; + + Ok(()) +} + +fn blocking_process_modular_archive( + planned: &PlannedArchive, + target_dir: &Path, + cache_dir: Option<&Path>, + shared: Option>, + resumable: bool, +) -> Result<()> { + let archive = &planned.archive; + if verify_output_files(target_dir, &archive.output_files)? { + if let Some(sp) = &shared { + sp.add(archive.size); + sp.archive_done(); + } + info!(target: "reth::cli", file = %archive.file_name, component = %planned.component, "Skipping already verified plain files"); + return Ok(()); + } + + let format = CompressionFormat::from_url(&archive.file_name)?; + for attempt in 1..=MAX_DOWNLOAD_RETRIES { + cleanup_output_files(target_dir, &archive.output_files); + + if resumable { + let cache_dir = cache_dir.ok_or_else(|| eyre::eyre!("Missing cache directory"))?; + let archive_path = cache_dir.join(&archive.file_name); + let part_path = cache_dir.join(format!("{}.part", archive.file_name)); + let (downloaded_path, _downloaded_size) = + resumable_download(&archive.url, cache_dir, shared.as_ref())?; + let file = fs::open(&downloaded_path)?; + extract_archive_raw(file, format, target_dir)?; + let _ = fs::remove_file(&archive_path); + let _ = fs::remove_file(&part_path); + } else { + streaming_download_and_extract(&archive.url, format, target_dir, shared.as_ref())?; + } + + if verify_output_files(target_dir, &archive.output_files)? { + if let Some(sp) = &shared { + sp.archive_done(); + } + return Ok(()); + } + + warn!(target: "reth::cli", file = %archive.file_name, component = %planned.component, attempt, "Extracted files failed integrity checks, retrying"); + } + + eyre::bail!( + "Failed integrity validation after {} attempts for {}", + MAX_DOWNLOAD_RETRIES, + archive.file_name + ) +} + +fn verify_output_files(target_dir: &Path, output_files: &[OutputFileChecksum]) -> Result { + if output_files.is_empty() { + return Ok(false); + } + + for expected in output_files { + let output_path = target_dir.join(&expected.path); + let meta = match fs::metadata(&output_path) { + Ok(meta) => meta, + Err(_) => return Ok(false), + }; + if meta.len() != expected.size { + return Ok(false); + } + + let actual = file_blake3_hex(&output_path)?; + if !actual.eq_ignore_ascii_case(&expected.blake3) { + return Ok(false); + } + } + + Ok(true) +} + +fn cleanup_output_files(target_dir: &Path, output_files: &[OutputFileChecksum]) { + for output in output_files { + let _ = fs::remove_file(target_dir.join(&output.path)); + } +} + +fn file_blake3_hex(path: &Path) -> Result { + let mut file = fs::open(path)?; + let mut hasher = Hasher::new(); + let mut buf = [0_u8; 64 * 1024]; + + loop { + let n = file.read(&mut buf)?; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + } + + Ok(hasher.finalize().to_hex().to_string()) +} + +/// Builds the base URL for the given chain ID using configured defaults. +fn get_base_url(chain_id: u64) -> String { + let defaults = DownloadDefaults::get_global(); + match &defaults.default_chain_aware_base_url { + Some(url) => format!("{url}/{chain_id}"), + None => defaults.default_base_url.to_string(), + } +} + +async fn fetch_manifest_from_source(source: &str) -> Result { + if let Ok(parsed) = Url::parse(source) { + return match parsed.scheme() { + "http" | "https" => { + Ok(Client::new().get(source).send().await?.error_for_status()?.json().await?) + } + "file" => { + let path = parsed + .to_file_path() + .map_err(|_| eyre::eyre!("Invalid file:// manifest path: {source}"))?; + let content = fs::read_to_string(path)?; + Ok(serde_json::from_str(&content)?) + } + _ => Err(eyre::eyre!("Unsupported manifest URL scheme: {}", parsed.scheme())), + }; + } + + let content = fs::read_to_string(source)?; + Ok(serde_json::from_str(&content)?) +} + +fn resolve_manifest_base_url(manifest: &SnapshotManifest, source: &str) -> Result { + if let Some(base_url) = manifest.base_url.as_deref() && + !base_url.is_empty() + { + return Ok(base_url.trim_end_matches('/').to_string()); + } + + if let Ok(mut url) = Url::parse(source) { + if url.scheme() == "file" { + let mut path = url + .to_file_path() + .map_err(|_| eyre::eyre!("Invalid file:// manifest path: {source}"))?; + path.pop(); + let mut base = Url::from_directory_path(path) + .map_err(|_| eyre::eyre!("Invalid manifest directory for source: {source}"))? + .to_string(); + if base.ends_with('/') { + base.pop(); + } + return Ok(base); + } + + { + let mut segments = url + .path_segments_mut() + .map_err(|_| eyre::eyre!("manifest_url must have a hierarchical path"))?; + segments.pop_if_empty(); + segments.pop(); + } + return Ok(url.as_str().trim_end_matches('/').to_string()); + } + + let path = Path::new(source); + let manifest_dir = if path.is_absolute() { + path.parent().map(Path::to_path_buf).unwrap_or_else(|| PathBuf::from(".")) + } else { + let joined = std::env::current_dir()?.join(path); + joined.parent().map(Path::to_path_buf).unwrap_or_else(|| PathBuf::from(".")) + }; + let mut base = Url::from_directory_path(&manifest_dir) + .map_err(|_| eyre::eyre!("Invalid manifest directory: {}", manifest_dir.display()))? + .to_string(); + if base.ends_with('/') { + base.pop(); + } + Ok(base) +} + +/// Builds default URL for latest mainnet archive snapshot using configured defaults. +/// +/// Used by the legacy single-archive download flow when no manifest is available. +#[allow(dead_code)] +async fn get_latest_snapshot_url(chain_id: u64) -> Result { + let base_url = get_base_url(chain_id); + let latest_url = format!("{base_url}/latest.txt"); + let filename = Client::new() + .get(latest_url) + .send() + .await? + .error_for_status()? + .text() + .await? + .trim() + .to_string(); + + Ok(format!("{base_url}/{filename}")) +} + +#[cfg(test)] +mod tests { + use super::*; + use manifest::{ComponentManifest, SingleArchive}; + use tempfile::tempdir; + + fn manifest_with_archive_only_components() -> SnapshotManifest { + let mut components = BTreeMap::new(); + components.insert( + SnapshotComponentType::TransactionSenders.key().to_string(), + ComponentManifest::Single(SingleArchive { + file: "transaction_senders.tar.zst".to_string(), + size: 1, + blake3: None, + output_files: vec![], + }), + ); + components.insert( + SnapshotComponentType::RocksdbIndices.key().to_string(), + ComponentManifest::Single(SingleArchive { + file: "rocksdb_indices.tar.zst".to_string(), + size: 1, + blake3: None, + output_files: vec![], + }), + ); + SnapshotManifest { + block: 0, + chain_id: 1, + storage_version: 2, + timestamp: 0, + base_url: Some("https://example.com".to_string()), + components, + } + } + + #[test] + fn test_download_defaults_builder() { + let defaults = DownloadDefaults::default() + .with_snapshot("https://example.com/snapshots (example)") + .with_base_url("https://example.com"); + + assert_eq!(defaults.default_base_url, "https://example.com"); + assert_eq!(defaults.available_snapshots.len(), 3); // 2 defaults + 1 added + } + + #[test] + fn test_download_defaults_replace_snapshots() { + let defaults = DownloadDefaults::default().with_snapshots(vec![ + Cow::Borrowed("https://custom1.com"), + Cow::Borrowed("https://custom2.com"), + ]); + + assert_eq!(defaults.available_snapshots.len(), 2); + assert_eq!(defaults.available_snapshots[0], "https://custom1.com"); + } + + #[test] + fn test_long_help_generation() { + let defaults = DownloadDefaults::default(); + let help = defaults.long_help(); + + assert!(help.contains("Available snapshot sources:")); + assert!(help.contains("merkle.io")); + assert!(help.contains("publicnode.com")); + assert!(help.contains("file://")); + } + + #[test] + fn test_long_help_override() { + let custom_help = "This is custom help text for downloading snapshots."; + let defaults = DownloadDefaults::default().with_long_help(custom_help); + + let help = defaults.long_help(); + assert_eq!(help, custom_help); + assert!(!help.contains("Available snapshot sources:")); + } + + #[test] + fn test_builder_chaining() { + let defaults = DownloadDefaults::default() + .with_base_url("https://custom.example.com") + .with_snapshot("https://snapshot1.com") + .with_snapshot("https://snapshot2.com") + .with_long_help("Custom help for snapshots"); + + assert_eq!(defaults.default_base_url, "https://custom.example.com"); + assert_eq!(defaults.available_snapshots.len(), 4); // 2 defaults + 2 added + assert_eq!(defaults.long_help, Some("Custom help for snapshots".to_string())); + } + + #[test] + fn test_compression_format_detection() { + assert!(matches!( + CompressionFormat::from_url("https://example.com/snapshot.tar.lz4"), + Ok(CompressionFormat::Lz4) + )); + assert!(matches!( + CompressionFormat::from_url("https://example.com/snapshot.tar.zst"), + Ok(CompressionFormat::Zstd) + )); + assert!(matches!( + CompressionFormat::from_url("file:///path/to/snapshot.tar.lz4"), + Ok(CompressionFormat::Lz4) + )); + assert!(matches!( + CompressionFormat::from_url("file:///path/to/snapshot.tar.zst"), + Ok(CompressionFormat::Zstd) + )); + assert!(CompressionFormat::from_url("https://example.com/snapshot.tar.gz").is_err()); + } + + #[test] + fn inject_archive_only_components_for_archive_selection() { + let manifest = manifest_with_archive_only_components(); + let mut selections = BTreeMap::new(); + selections.insert(SnapshotComponentType::Transactions, ComponentSelection::All); + selections.insert(SnapshotComponentType::Receipts, ComponentSelection::All); + selections.insert(SnapshotComponentType::AccountChangesets, ComponentSelection::All); + selections.insert(SnapshotComponentType::StorageChangesets, ComponentSelection::All); + + inject_archive_only_components(&mut selections, &manifest, true); + + assert_eq!( + selections.get(&SnapshotComponentType::TransactionSenders), + Some(&ComponentSelection::All) + ); + assert_eq!( + selections.get(&SnapshotComponentType::RocksdbIndices), + Some(&ComponentSelection::All) + ); + } + + #[test] + fn inject_archive_only_components_without_rocksdb() { + let manifest = manifest_with_archive_only_components(); + let mut selections = BTreeMap::new(); + selections.insert(SnapshotComponentType::Transactions, ComponentSelection::All); + selections.insert(SnapshotComponentType::Receipts, ComponentSelection::All); + selections.insert(SnapshotComponentType::AccountChangesets, ComponentSelection::All); + selections.insert(SnapshotComponentType::StorageChangesets, ComponentSelection::All); + + inject_archive_only_components(&mut selections, &manifest, false); + + assert_eq!( + selections.get(&SnapshotComponentType::TransactionSenders), + Some(&ComponentSelection::All) + ); + assert_eq!(selections.get(&SnapshotComponentType::RocksdbIndices), None); + } + + #[test] + fn should_reset_index_stage_checkpoints_without_rocksdb_indices() { + let mut selections = BTreeMap::new(); + selections.insert(SnapshotComponentType::Transactions, ComponentSelection::All); + assert!(should_reset_index_stage_checkpoints(&selections)); + + selections.insert(SnapshotComponentType::RocksdbIndices, ComponentSelection::All); + assert!(!should_reset_index_stage_checkpoints(&selections)); + } + + #[test] + fn summarize_download_startup_counts_reusable_and_needs_download() { + let dir = tempdir().unwrap(); + let target_dir = dir.path(); + let ok_file = target_dir.join("ok.bin"); + std::fs::write(&ok_file, vec![1_u8; 4]).unwrap(); + let ok_hash = file_blake3_hex(&ok_file).unwrap(); + + let planned = vec![ + PlannedArchive { + ty: SnapshotComponentType::State, + component: "State".to_string(), + archive: ArchiveDescriptor { + url: "https://example.com/ok.tar.zst".to_string(), + file_name: "ok.tar.zst".to_string(), + size: 10, + blake3: None, + output_files: vec![OutputFileChecksum { + path: "ok.bin".to_string(), + size: 4, + blake3: ok_hash, + }], + }, + }, + PlannedArchive { + ty: SnapshotComponentType::Headers, + component: "Headers".to_string(), + archive: ArchiveDescriptor { + url: "https://example.com/missing.tar.zst".to_string(), + file_name: "missing.tar.zst".to_string(), + size: 10, + blake3: None, + output_files: vec![OutputFileChecksum { + path: "missing.bin".to_string(), + size: 1, + blake3: "deadbeef".to_string(), + }], + }, + }, + PlannedArchive { + ty: SnapshotComponentType::Transactions, + component: "Transactions".to_string(), + archive: ArchiveDescriptor { + url: "https://example.com/bad-size.tar.zst".to_string(), + file_name: "bad-size.tar.zst".to_string(), + size: 10, + blake3: None, + output_files: vec![], + }, + }, + ]; + + let summary = summarize_download_startup(&planned, target_dir).unwrap(); + assert_eq!(summary.reusable, 1); + assert_eq!(summary.needs_download, 2); + } + + #[test] + fn archive_priority_prefers_state_then_rocksdb() { + let mut planned = [ + PlannedArchive { + ty: SnapshotComponentType::Transactions, + component: "Transactions".to_string(), + archive: ArchiveDescriptor { + url: "u3".to_string(), + file_name: "t.tar.zst".to_string(), + size: 1, + blake3: None, + output_files: vec![OutputFileChecksum { + path: "a".to_string(), + size: 1, + blake3: "x".to_string(), + }], + }, + }, + PlannedArchive { + ty: SnapshotComponentType::RocksdbIndices, + component: "RocksDB Indices".to_string(), + archive: ArchiveDescriptor { + url: "u2".to_string(), + file_name: "rocksdb_indices.tar.zst".to_string(), + size: 1, + blake3: None, + output_files: vec![OutputFileChecksum { + path: "b".to_string(), + size: 1, + blake3: "y".to_string(), + }], + }, + }, + PlannedArchive { + ty: SnapshotComponentType::State, + component: "State (mdbx)".to_string(), + archive: ArchiveDescriptor { + url: "u1".to_string(), + file_name: "state.tar.zst".to_string(), + size: 1, + blake3: None, + output_files: vec![OutputFileChecksum { + path: "c".to_string(), + size: 1, + blake3: "z".to_string(), + }], + }, + }, + ]; + + planned.sort_by(|a, b| { + archive_priority_rank(a.ty) + .cmp(&archive_priority_rank(b.ty)) + .then_with(|| a.component.cmp(&b.component)) + .then_with(|| a.archive.file_name.cmp(&b.archive.file_name)) + }); + + assert_eq!(planned[0].ty, SnapshotComponentType::State); + assert_eq!(planned[1].ty, SnapshotComponentType::RocksdbIndices); + assert_eq!(planned[2].ty, SnapshotComponentType::Transactions); + } +} diff --git a/crates/cli/commands/src/download/tui.rs b/crates/cli/commands/src/download/tui.rs new file mode 100644 index 0000000000..5663e7a136 --- /dev/null +++ b/crates/cli/commands/src/download/tui.rs @@ -0,0 +1,437 @@ +use crate::download::{ + manifest::{ComponentSelection, SnapshotComponentType, SnapshotManifest}, + DownloadProgress, SelectionPreset, +}; +use crossterm::{ + event::{self, Event, KeyCode}, + execute, + terminal::{disable_raw_mode, enable_raw_mode, EnterAlternateScreen, LeaveAlternateScreen}, +}; +use ratatui::{ + backend::CrosstermBackend, + layout::{Constraint, Direction, Layout}, + style::{Color, Modifier, Style}, + text::{Line, Span}, + widgets::{Block, Borders, List, ListItem, ListState, Paragraph}, + Frame, Terminal, +}; +use std::{ + collections::BTreeMap, + io, + time::{Duration, Instant}, +}; + +/// Result of the interactive component selector. +pub struct SelectorOutput { + /// User-confirmed selections with per-component ranges. + pub selections: BTreeMap, + /// Last preset action used in the TUI, if any. + pub preset: Option, +} + +/// All distance presets. Groups filter this to only valid options. +const DISTANCE_PRESETS: [ComponentSelection; 6] = [ + ComponentSelection::None, + ComponentSelection::Distance(64), + ComponentSelection::Distance(10_064), + ComponentSelection::Distance(100_000), + ComponentSelection::Distance(1_000_000), + ComponentSelection::All, +]; + +/// Presets for components that require at least 64 blocks (receipts). +const RECEIPTS_PRESETS: [ComponentSelection; 5] = [ + ComponentSelection::Distance(64), + ComponentSelection::Distance(10_064), + ComponentSelection::Distance(100_000), + ComponentSelection::Distance(1_000_000), + ComponentSelection::All, +]; + +/// Presets for components that require at least 10064 blocks (account/storage history). +const HISTORY_PRESETS: [ComponentSelection; 4] = [ + ComponentSelection::Distance(10_064), + ComponentSelection::Distance(100_000), + ComponentSelection::Distance(1_000_000), + ComponentSelection::All, +]; + +/// A display group bundles one or more component types into a single TUI row. +struct DisplayGroup { + /// Display name shown in the TUI. + name: &'static str, + /// Underlying component types this group controls. + types: Vec, + /// Whether this group is required and locked to All. + required: bool, + /// Valid presets for this group. Components with minimum distance requirements + /// exclude presets that would produce invalid prune configs. + presets: &'static [ComponentSelection], +} + +/// Build the display groups from available components in the manifest. +fn build_groups(manifest: &SnapshotManifest) -> Vec { + let has = |ty: SnapshotComponentType| manifest.component(ty).is_some(); + + let mut groups = Vec::new(); + + if has(SnapshotComponentType::State) { + groups.push(DisplayGroup { + name: "State (mdbx)", + types: vec![SnapshotComponentType::State], + required: true, + presets: &DISTANCE_PRESETS, + }); + } + + if has(SnapshotComponentType::Headers) { + groups.push(DisplayGroup { + name: "Headers", + types: vec![SnapshotComponentType::Headers], + required: true, + presets: &DISTANCE_PRESETS, + }); + } + + if has(SnapshotComponentType::Transactions) { + groups.push(DisplayGroup { + name: "Transactions", + types: vec![SnapshotComponentType::Transactions], + required: false, + presets: &HISTORY_PRESETS, + }); + } + + if has(SnapshotComponentType::Receipts) { + groups.push(DisplayGroup { + name: "Receipts", + types: vec![SnapshotComponentType::Receipts], + required: false, + presets: &RECEIPTS_PRESETS, + }); + } + + // Bundle account + storage changesets as "State History" + let has_acc = has(SnapshotComponentType::AccountChangesets); + let has_stor = has(SnapshotComponentType::StorageChangesets); + if has_acc || has_stor { + let mut types = Vec::new(); + if has_acc { + types.push(SnapshotComponentType::AccountChangesets); + } + if has_stor { + types.push(SnapshotComponentType::StorageChangesets); + } + groups.push(DisplayGroup { + name: "State History", + types, + required: false, + presets: &HISTORY_PRESETS, + }); + } + + groups +} + +struct SelectorApp { + manifest: SnapshotManifest, + full_preset: BTreeMap, + /// Display groups shown in the TUI. + groups: Vec, + /// Current selection for each group. + selections: Vec, + /// Last preset action invoked by user. + preset: Option, + /// Current cursor position. + cursor: usize, + /// List state for ratatui. + list_state: ListState, +} + +impl SelectorApp { + fn new( + manifest: SnapshotManifest, + full_preset: BTreeMap, + ) -> Self { + let groups = build_groups(&manifest); + + // Default to the minimal preset (matches --minimal prune config) + let selections = groups.iter().map(|g| g.types[0].minimal_selection()).collect(); + + let mut list_state = ListState::default(); + list_state.select(Some(0)); + + Self { + manifest, + full_preset, + groups, + selections, + preset: Some(SelectionPreset::Minimal), + cursor: 0, + list_state, + } + } + + fn cycle_right(&mut self) { + if let Some(group) = self.groups.get(self.cursor) { + if group.required { + return; + } + let presets = group.presets; + let current = self.selections[self.cursor]; + let idx = presets.iter().position(|p| *p == current).unwrap_or(0); + self.selections[self.cursor] = presets[(idx + 1) % presets.len()]; + self.preset = None; + } + } + + fn cycle_left(&mut self) { + if let Some(group) = self.groups.get(self.cursor) { + if group.required { + return; + } + let presets = group.presets; + let current = self.selections[self.cursor]; + let idx = presets.iter().position(|p| *p == current).unwrap_or(0); + self.selections[self.cursor] = presets[(idx + presets.len() - 1) % presets.len()]; + self.preset = None; + } + } + + fn select_all(&mut self) { + for sel in &mut self.selections { + *sel = ComponentSelection::All; + } + self.preset = Some(SelectionPreset::Archive); + } + + fn select_minimal(&mut self) { + for (i, group) in self.groups.iter().enumerate() { + self.selections[i] = group.types[0].minimal_selection(); + } + self.preset = Some(SelectionPreset::Minimal); + } + + fn select_full(&mut self) { + for (i, group) in self.groups.iter().enumerate() { + let mut selection = group.types[0].minimal_selection(); + for ty in &group.types { + if let Some(sel) = self.full_preset.get(ty).copied() { + selection = sel; + break; + } + } + self.selections[i] = selection; + } + self.preset = Some(SelectionPreset::Full); + } + + fn move_up(&mut self) { + if self.cursor > 0 { + self.cursor -= 1; + } else { + self.cursor = self.groups.len().saturating_sub(1); + } + self.list_state.select(Some(self.cursor)); + } + + fn move_down(&mut self) { + if self.cursor < self.groups.len() - 1 { + self.cursor += 1; + } else { + self.cursor = 0; + } + self.list_state.select(Some(self.cursor)); + } + + /// Build the flat component→selection map from grouped selections. + fn selection_map(&self) -> BTreeMap { + let mut map = BTreeMap::new(); + for (group, sel) in self.groups.iter().zip(&self.selections) { + for ty in &group.types { + map.insert(*ty, *sel); + } + } + map + } + + /// Size for a single group, summing all component types in the group. + fn group_size(&self, group_idx: usize) -> u64 { + let sel = self.selections[group_idx]; + let distance = match sel { + ComponentSelection::None => return 0, + ComponentSelection::All => None, + ComponentSelection::Distance(d) => Some(d), + }; + self.groups[group_idx] + .types + .iter() + .map(|ty| self.manifest.size_for_distance(*ty, distance)) + .sum() + } + + fn total_selected_size(&self) -> u64 { + (0..self.groups.len()).map(|i| self.group_size(i)).sum() + } +} + +/// Runs the interactive component selector TUI. +pub fn run_selector( + manifest: SnapshotManifest, + full_preset: &BTreeMap, +) -> eyre::Result { + enable_raw_mode()?; + let mut stdout = io::stdout(); + execute!(stdout, EnterAlternateScreen)?; + let backend = CrosstermBackend::new(stdout); + let mut terminal = Terminal::new(backend)?; + + let mut app = SelectorApp::new(manifest, full_preset.clone()); + let result = event_loop(&mut terminal, &mut app); + + disable_raw_mode()?; + execute!(terminal.backend_mut(), LeaveAlternateScreen)?; + terminal.show_cursor()?; + + result +} + +fn event_loop( + terminal: &mut Terminal>, + app: &mut SelectorApp, +) -> eyre::Result { + let tick_rate = Duration::from_millis(100); + let mut last_tick = Instant::now(); + + loop { + terminal.draw(|f| render(f, app))?; + + let timeout = + tick_rate.checked_sub(last_tick.elapsed()).unwrap_or_else(|| Duration::from_secs(0)); + + if crossterm::event::poll(timeout)? && + let Event::Key(key) = event::read()? && + key.kind == event::KeyEventKind::Press + { + match key.code { + KeyCode::Char('q') | KeyCode::Esc => { + eyre::bail!("Download cancelled by user"); + } + KeyCode::Enter => { + return Ok(SelectorOutput { + selections: app.selection_map(), + preset: app.preset, + }); + } + KeyCode::Right | KeyCode::Char('l') | KeyCode::Char(' ') => app.cycle_right(), + KeyCode::Left | KeyCode::Char('h') => app.cycle_left(), + KeyCode::Char('a') => app.select_all(), + KeyCode::Char('f') => app.select_full(), + KeyCode::Char('m') => app.select_minimal(), + KeyCode::Up | KeyCode::Char('k') => app.move_up(), + KeyCode::Down | KeyCode::Char('j') => app.move_down(), + _ => {} + } + } + + if last_tick.elapsed() >= tick_rate { + last_tick = Instant::now(); + } + } +} + +fn format_selection(sel: &ComponentSelection) -> String { + match sel { + ComponentSelection::All => "All".to_string(), + ComponentSelection::Distance(d) => format!("Last {d} blocks"), + ComponentSelection::None => "None".to_string(), + } +} + +fn render(f: &mut Frame<'_>, app: &mut SelectorApp) { + let chunks = Layout::default() + .direction(Direction::Vertical) + .constraints([ + Constraint::Length(3), // Header + Constraint::Min(8), // Component list + Constraint::Length(3), // Footer + ]) + .split(f.area()); + + // Header + let block_info = if app.manifest.block > 0 { + format!(" (block {})", app.manifest.block) + } else { + String::new() + }; + let header = Paragraph::new(format!(" Select snapshot components to download{}", block_info)) + .style(Style::default().fg(Color::Cyan).add_modifier(Modifier::BOLD)) + .block(Block::default().borders(Borders::ALL).title("reth download")); + f.render_widget(header, chunks[0]); + + // Component list + let items: Vec> = app + .groups + .iter() + .enumerate() + .map(|(i, group)| { + let sel = &app.selections[i]; + let sel_str = format_selection(sel); + + let size = app.group_size(i); + let size_str = + if size > 0 { DownloadProgress::format_size(size) } else { String::new() }; + + let required = if group.required { " (required)" } else { "" }; + + let at_max = *sel == *group.presets.last().unwrap_or(&ComponentSelection::All); + let at_min = *sel == group.presets[0]; + let arrows = if group.required { + " " + } else if at_max { + "◂ " + } else if at_min { + " ▸" + } else { + "◂ ▸" + }; + + let style = if group.required { + Style::default().fg(Color::DarkGray) + } else if matches!(sel, ComponentSelection::None) { + Style::default().fg(Color::White) + } else { + Style::default().fg(Color::Green) + }; + + ListItem::new(Line::from(vec![ + Span::styled(format!(" {:<22}", group.name), style), + Span::styled( + format!("{arrows} {:<12}", sel_str), + style.add_modifier(Modifier::BOLD), + ), + Span::styled(format!("{:>10}", size_str), style.add_modifier(Modifier::DIM)), + Span::styled(required.to_string(), Style::default().fg(Color::DarkGray)), + ])) + }) + .collect(); + + let total_str = DownloadProgress::format_size(app.total_selected_size()); + let list = List::new(items) + .block( + Block::default() + .borders(Borders::ALL) + .title(format!("Components — Total: {total_str}")), + ) + .highlight_style(Style::default().add_modifier(Modifier::BOLD).bg(Color::DarkGray)) + .highlight_symbol("▸ "); + f.render_stateful_widget(list, chunks[1], &mut app.list_state); + + // Footer + let footer = Paragraph::new( + " [←/→] adjust [m] minimal [f] full [a] archive [Enter] confirm [Esc] cancel", + ) + .style(Style::default().fg(Color::Cyan).add_modifier(Modifier::BOLD)) + .block(Block::default().borders(Borders::ALL)); + f.render_widget(footer, chunks[2]); +} diff --git a/crates/ethereum/cli/src/app.rs b/crates/ethereum/cli/src/app.rs index 076b7965e0..5858689d1e 100644 --- a/crates/ethereum/cli/src/app.rs +++ b/crates/ethereum/cli/src/app.rs @@ -194,6 +194,7 @@ where runner.run_blocking_command_until_exit(|ctx| command.execute::(ctx)) } Commands::Download(command) => runner.run_blocking_until_ctrl_c(command.execute::()), + Commands::SnapshotManifest(command) => command.execute(), Commands::Stage(command) => { runner.run_command_until_exit(|ctx| command.execute::(ctx, components)) } diff --git a/crates/ethereum/cli/src/interface.rs b/crates/ethereum/cli/src/interface.rs index c064e8c190..5a5469c2db 100644 --- a/crates/ethereum/cli/src/interface.rs +++ b/crates/ethereum/cli/src/interface.rs @@ -6,7 +6,9 @@ use reth_chainspec::{ChainSpec, Hardforks}; use reth_cli::chainspec::ChainSpecParser; use reth_cli_commands::{ common::{CliComponentsBuilder, CliNodeTypes, HeaderMut}, - config_cmd, db, download, dump_genesis, export_era, import, import_era, init_cmd, init_state, + config_cmd, db, download, + download::manifest_cmd, + dump_genesis, export_era, import, import_era, init_cmd, init_state, launcher::FnLauncher, node::{self, NoArgs}, p2p, prune, re_execute, stage, @@ -281,6 +283,9 @@ pub enum Commands< /// Download public node snapshots #[command(name = "download")] Download(download::DownloadCommand), + /// Generate a snapshot manifest from local archive files. + #[command(name = "snapshot-manifest")] + SnapshotManifest(manifest_cmd::SnapshotManifestCommand), /// Manipulate individual stages. #[command(name = "stage")] Stage(stage::Command), @@ -333,6 +338,7 @@ impl cmd.chain_spec(), Self::Db(cmd) => cmd.chain_spec(), Self::Download(cmd) => cmd.chain_spec(), + Self::SnapshotManifest(_) => None, Self::Stage(cmd) => cmd.chain_spec(), Self::P2P(cmd) => cmd.chain_spec(), #[cfg(feature = "dev")] diff --git a/docs/vocs/docs/pages/cli/SUMMARY.mdx b/docs/vocs/docs/pages/cli/SUMMARY.mdx index dbf450974e..75ff441414 100644 --- a/docs/vocs/docs/pages/cli/SUMMARY.mdx +++ b/docs/vocs/docs/pages/cli/SUMMARY.mdx @@ -41,6 +41,7 @@ - [`reth db account-storage`](./reth/db/account-storage.mdx) - [`reth db state`](./reth/db/state.mdx) - [`reth download`](./reth/download.mdx) + - [`reth snapshot-manifest`](./reth/snapshot-manifest.mdx) - [`reth stage`](./reth/stage.mdx) - [`reth stage run`](./reth/stage/run.mdx) - [`reth stage drop`](./reth/stage/drop.mdx) diff --git a/docs/vocs/docs/pages/cli/reth.mdx b/docs/vocs/docs/pages/cli/reth.mdx index 579748dc8d..66b7b09c93 100644 --- a/docs/vocs/docs/pages/cli/reth.mdx +++ b/docs/vocs/docs/pages/cli/reth.mdx @@ -9,21 +9,22 @@ $ reth --help Usage: reth [OPTIONS] Commands: - node Start the node - init Initialize the database from a genesis file - init-state Initialize the database from a state dump file - import This syncs RLP encoded blocks from a file or files - import-era This syncs ERA encoded blocks from a directory - export-era Exports block to era1 files in a specified directory - dump-genesis Dumps genesis block JSON configuration to stdout - db Database debugging utilities - download Download public node snapshots - stage Manipulate individual stages - p2p P2P Debugging utilities - config Write config to stdout - prune Prune according to the configuration without any limits - re-execute Re-execute blocks in parallel to verify historical sync correctness - help Print this message or the help of the given subcommand(s) + node Start the node + init Initialize the database from a genesis file + init-state Initialize the database from a state dump file + import This syncs RLP encoded blocks from a file or files + import-era This syncs ERA encoded blocks from a directory + export-era Exports block to era1 files in a specified directory + dump-genesis Dumps genesis block JSON configuration to stdout + db Database debugging utilities + download Download public node snapshots + snapshot-manifest Generate a snapshot manifest from local archive files + stage Manipulate individual stages + p2p P2P Debugging utilities + config Write config to stdout + prune Prune according to the configuration without any limits + re-execute Re-execute blocks in parallel to verify historical sync correctness + help Print this message or the help of the given subcommand(s) Options: -h, --help diff --git a/docs/vocs/docs/pages/cli/reth/download.mdx b/docs/vocs/docs/pages/cli/reth/download.mdx index b25b05984c..57425abb1e 100644 --- a/docs/vocs/docs/pages/cli/reth/download.mdx +++ b/docs/vocs/docs/pages/cli/reth/download.mdx @@ -133,6 +133,50 @@ Storage: Local file:// URLs are also supported for extracting snapshots from disk. + --manifest-url + URL to a snapshot manifest.json for modular component downloads. + + When provided, fetches this manifest instead of discovering it from the default base URL. Useful for testing with custom or local manifests. + + --manifest-path + Local path to a snapshot manifest.json for modular component downloads + + --with-txs + Include transaction static files + + --with-receipts + Include receipt static files + + --with-state-history + Include account and storage history static files + + --archive + Download all available components (archive node, no pruning) + + --minimal + Download the minimal component set (same default as --non-interactive) + + --full + Download the full node component set (matches default full prune settings) + + --without-rocksdb + Skip optional RocksDB indices even when archive components are selected. + + This affects `--archive`/`--all` and TUI archive preset (`a`). + + -y, --non-interactive + Skip interactive component selection. Downloads the minimal set (state + headers + transactions + changesets) unless explicit --with-* flags narrow it + + --resumable + Use resumable two-phase downloads (download to disk first, then extract). + + Archives are downloaded to a .part file with HTTP Range resume support before extraction. Slower but tolerates network interruptions without restarting. By default, archives stream directly into the extractor. + + --download-concurrency + Maximum number of concurrent modular archive workers + + [default: 8] + Logging: --log.stdout.format The format to use for logs written to stdout diff --git a/docs/vocs/docs/pages/cli/reth/snapshot-manifest.mdx b/docs/vocs/docs/pages/cli/reth/snapshot-manifest.mdx new file mode 100644 index 0000000000..3da7f9fa5a --- /dev/null +++ b/docs/vocs/docs/pages/cli/reth/snapshot-manifest.mdx @@ -0,0 +1,180 @@ +# reth snapshot-manifest + +Generate a snapshot manifest from local archive files + +```bash +$ reth snapshot-manifest --help +``` +```txt +Usage: reth snapshot-manifest [OPTIONS] --source-datadir --output-dir + +Options: + -d, --source-datadir + Source datadir containing static files + + --base-url + Optional base URL where archives will be hosted + + -o, --output-dir + Output directory where chunk archives and manifest.json are written + + --block + Block number this snapshot was taken at. + + If omitted, this is inferred from the source datadir's `Finish` stage checkpoint. + + --chain-id + Chain ID + + [default: 1] + + --blocks-per-file + Blocks per archive file for chunked components. + + If omitted, this is inferred from header static file ranges in the source datadir. + + -h, --help + Print help (see a summary with '-h') + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + [default: terminal] + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + [default: terminal] + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.name + The prefix name of the log files + + [default: reth.log] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled. + + Default: 5 for `node` command, 0 for non-node utility subcommands. + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + Possible values: + - always: Colors on + - auto: Auto-detect + - never: Colors off + + [default: always] + + --logs-otlp[=] + Enable `Opentelemetry` logs export to an OTLP endpoint. + + If no value provided, defaults based on protocol: - HTTP: `http://localhost:4318/v1/logs` - gRPC: `http://localhost:4317` + + Example: --logs-otlp=http://collector:4318/v1/logs + + [env: OTEL_EXPORTER_OTLP_LOGS_ENDPOINT=] + + --logs-otlp.filter + Set a filter directive for the OTLP logs exporter. This controls the verbosity of logs sent to the OTLP endpoint. It follows the same syntax as the `RUST_LOG` environment variable. + + Example: --logs-otlp.filter=info,reth=debug + + Defaults to INFO if not specified. + + [default: info] + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output + +Tracing: + --tracing-otlp[=] + Enable `Opentelemetry` tracing export to an OTLP endpoint. + + If no value provided, defaults based on protocol: - HTTP: `http://localhost:4318/v1/traces` - gRPC: `http://localhost:4317` + + Example: --tracing-otlp=http://collector:4318/v1/traces + + [env: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=] + + --tracing-otlp-protocol + OTLP transport protocol to use for exporting traces and logs. + + - `http`: expects endpoint path to end with `/v1/traces` or `/v1/logs` - `grpc`: expects endpoint without a path + + Defaults to HTTP if not specified. + + Possible values: + - http: HTTP/Protobuf transport, port 4318, requires `/v1/traces` path + - grpc: gRPC transport, port 4317 + + [env: OTEL_EXPORTER_OTLP_PROTOCOL=] + [default: http] + + --tracing-otlp.filter + Set a filter directive for the OTLP tracer. This controls the verbosity of spans and events sent to the OTLP endpoint. It follows the same syntax as the `RUST_LOG` environment variable. + + Example: --tracing-otlp.filter=info,reth=debug,hyper_util=off + + Defaults to TRACE if not specified. + + [default: debug] + + --tracing-otlp.sample-ratio + Trace sampling ratio to control the percentage of traces to export. + + Valid range: 0.0 to 1.0 - 1.0, default: Sample all traces - 0.01: Sample 1% of traces - 0.0: Disable sampling + + Example: --tracing-otlp.sample-ratio=0.0. + + [env: OTEL_TRACES_SAMPLER_ARG=] +``` \ No newline at end of file diff --git a/docs/vocs/sidebar-cli-reth.ts b/docs/vocs/sidebar-cli-reth.ts index 1cb71f9ddb..0fa4790a55 100644 --- a/docs/vocs/sidebar-cli-reth.ts +++ b/docs/vocs/sidebar-cli-reth.ts @@ -200,6 +200,10 @@ export const rethCliSidebar: SidebarItem = { text: "reth download", link: "/cli/reth/download" }, + { + text: "reth snapshot-manifest", + link: "/cli/reth/snapshot-manifest" + }, { text: "reth stage", link: "/cli/reth/stage",