diff --git a/Cargo.lock b/Cargo.lock index 91348f1536..871d35cf46 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5324,6 +5324,25 @@ dependencies = [ "hashbrown 0.15.3", ] +[[package]] +name = "lz4" +version = "1.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a20b523e860d03443e98350ceaac5e71c6ba89aea7d960769ec3ce37f4de5af4" +dependencies = [ + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.11.1+lz4-1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "lz4_flex" version = "0.11.3" @@ -6875,6 +6894,7 @@ version = "1.3.12" dependencies = [ "alloy-rpc-types", "aquamarine", + "backon", "clap", "eyre", "reth-chainspec", @@ -6910,6 +6930,8 @@ dependencies = [ "reth-tasks", "reth-tokio-util", "reth-transaction-pool", + "similar-asserts", + "tempfile", "tokio", "tracing", ] @@ -7058,6 +7080,7 @@ dependencies = [ "futures", "human_bytes", "itertools 0.14.0", + "lz4", "proptest", "proptest-arbitrary-interop", "ratatui", @@ -7108,6 +7131,7 @@ dependencies = [ "secp256k1", "serde", "serde_json", + "tar", "tokio", "tokio-stream", "toml", @@ -11463,6 +11487,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "tar" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tar-no-std" version = "0.3.3" @@ -13176,6 +13211,16 @@ dependencies = [ "tap", ] +[[package]] +name = "xattr" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d65cbf2f12c15564212d48f4e3dfb87923d25d611f2aed18f4cb23f0413d89e" +dependencies = [ + "libc", + "rustix 1.0.7", +] + [[package]] name = "yansi" version = "1.0.1" diff --git a/Cargo.toml b/Cargo.toml index d4a1c29ca5..73b0f5dec5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -530,6 +530,7 @@ humantime = "2.1" humantime-serde = "1.1" itertools = { version = "0.14", default-features = false } linked_hash_set = "0.1" +lz4 = "1.28.1" modular-bitfield = "0.11.2" notify = { version = "8.0.0", default-features = false, features = ["macos_fsevent"] } nybbles = { version = "0.3.0", default-features = false } @@ -550,6 +551,7 @@ strum = { version = "0.27", default-features = false } strum_macros = "0.27" syn = "2.0" thiserror = { version = "2.0.0", default-features = false } +tar = "0.4.44" tracing = { version = "0.1.0", default-features = false } tracing-appender = "0.2" url = { version = "2.3", default-features = false } diff --git a/bin/reth/Cargo.toml b/bin/reth/Cargo.toml index d8f1f93a8b..4d93ca5d73 100644 --- a/bin/reth/Cargo.toml +++ b/bin/reth/Cargo.toml @@ -59,8 +59,13 @@ tokio = { workspace = true, features = ["sync", "macros", "time", "rt-multi-thre # misc aquamarine.workspace = true -eyre.workspace = true clap = { workspace = true, features = ["derive", "env"] } +eyre.workspace = true + +[dev-dependencies] +backon.workspace = true +similar-asserts.workspace = true +tempfile.workspace = true [features] default = ["jemalloc", "reth-revm/portable"] diff --git a/book/SUMMARY.md b/book/SUMMARY.md index cff0dd9ef6..310eebb028 100644 --- a/book/SUMMARY.md +++ b/book/SUMMARY.md @@ -49,6 +49,7 @@ - [`reth db clear static-file`](./cli/reth/db/clear/static-file.md) - [`reth db version`](./cli/reth/db/version.md) - [`reth db path`](./cli/reth/db/path.md) + - [`reth download`](./cli/reth/download.md) - [`reth stage`](./cli/reth/stage.md) - [`reth stage run`](./cli/reth/stage/run.md) - [`reth stage drop`](./cli/reth/stage/drop.md) diff --git a/book/cli/SUMMARY.md b/book/cli/SUMMARY.md index 7373a3a6f4..aa62529859 100644 --- a/book/cli/SUMMARY.md +++ b/book/cli/SUMMARY.md @@ -19,6 +19,7 @@ - [`reth db clear static-file`](./reth/db/clear/static-file.md) - [`reth db version`](./reth/db/version.md) - [`reth db path`](./reth/db/path.md) + - [`reth download`](./reth/download.md) - [`reth stage`](./reth/stage.md) - [`reth stage run`](./reth/stage/run.md) - [`reth stage drop`](./reth/stage/drop.md) diff --git a/book/cli/reth.md b/book/cli/reth.md index 791a201a88..8225d71b3b 100644 --- a/book/cli/reth.md +++ b/book/cli/reth.md @@ -16,6 +16,7 @@ Commands: import-era This syncs ERA encoded blocks from a directory dump-genesis Dumps genesis block JSON configuration to stdout db Database debugging utilities + download Download public node snapshots stage Manipulate individual stages p2p P2P Debugging utilities config Write config to stdout diff --git a/book/cli/reth/download.md b/book/cli/reth/download.md new file mode 100644 index 0000000000..04a7228f21 --- /dev/null +++ b/book/cli/reth/download.md @@ -0,0 +1,156 @@ +# reth download + +Download public node snapshots + +```bash +$ reth download --help +``` +```txt +Usage: reth download [OPTIONS] + +Options: + -h, --help + Print help (see a summary with '-h') + +Datadir: + --datadir + The path to the data dir for all reth files and subdirectories. + + Defaults to the OS-specific data directory: + + - Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/` + - Windows: `{FOLDERID_RoamingAppData}/reth/` + - macOS: `$HOME/Library/Application Support/reth/` + + [default: default] + + --datadir.static-files + The absolute path to store static files in. + + --config + The path to the configuration file to use + + --chain + The chain this node is running. + Possible values are either a built-in chain or the path to a chain specification file. + + Built-in chains: + mainnet, sepolia, holesky, hoodi, dev + + [default: mainnet] + +Database: + --db.log-level + Database logging level. Levels higher than "notice" require a debug build + + Possible values: + - fatal: Enables logging for critical conditions, i.e. assertion failures + - error: Enables logging for error conditions + - warn: Enables logging for warning conditions + - notice: Enables logging for normal but significant condition + - verbose: Enables logging for verbose informational + - debug: Enables logging for debug-level messages + - trace: Enables logging for trace debug-level messages + - extra: Enables logging for extra debug-level messages + + --db.exclusive + Open environment in exclusive/monopolistic mode. Makes it possible to open a database on an NFS volume + + [possible values: true, false] + + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + + --db.read-transaction-timeout + Read transaction timeout in seconds, 0 means no timeout + + -u, --url + Specify a snapshot URL or let the command propose a default one. + + Available snapshot sources: + - https://downloads.merkle.io (default, mainnet archive) + - https://publicnode.com/snapshots (full nodes & testnets) + + If no URL is provided, the latest mainnet archive snapshot + will be proposed for download from merkle.io + +Logging: + --log.stdout.format + The format to use for logs written to stdout + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.stdout.filter + The filter to use for logs written to stdout + + [default: ] + + --log.file.format + The format to use for logs written to the log file + + [default: terminal] + + Possible values: + - json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging + - log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications + - terminal: Represents terminal-friendly formatting for logs + + --log.file.filter + The filter to use for logs written to the log file + + [default: debug] + + --log.file.directory + The path to put log files in + + [default: /logs] + + --log.file.max-size + The maximum size (in MB) of one log file + + [default: 200] + + --log.file.max-files + The maximum amount of log files that will be stored. If set to 0, background file logging is disabled + + [default: 5] + + --log.journald + Write logs to journald + + --log.journald.filter + The filter to use for logs written to journald + + [default: error] + + --color + Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting + + [default: always] + + Possible values: + - always: Colors on + - auto: Colors on + - never: Colors off + +Display: + -v, --verbosity... + Set the minimum log level. + + -v Errors + -vv Warnings + -vvv Info + -vvvv Debug + -vvvvv Traces (warning: very verbose!) + + -q, --quiet + Silence all log output +``` \ No newline at end of file diff --git a/crates/cli/commands/Cargo.toml b/crates/cli/commands/Cargo.toml index 6fcd891d06..b8e4d39769 100644 --- a/crates/cli/commands/Cargo.toml +++ b/crates/cli/commands/Cargo.toml @@ -70,8 +70,10 @@ ahash.workspace = true human_bytes.workspace = true eyre.workspace = true clap = { workspace = true, features = ["derive", "env"] } +lz4.workspace = true serde.workspace = true serde_json.workspace = true +tar.workspace = true tracing.workspace = true backon.workspace = true secp256k1 = { workspace = true, features = ["global-context", "std", "recovery"] } diff --git a/crates/cli/commands/src/download.rs b/crates/cli/commands/src/download.rs new file mode 100644 index 0000000000..08c21d9eb8 --- /dev/null +++ b/crates/cli/commands/src/download.rs @@ -0,0 +1,199 @@ +use crate::common::EnvironmentArgs; +use clap::Parser; +use eyre::Result; +use lz4::Decoder; +use reqwest::Client; +use reth_chainspec::{EthChainSpec, EthereumHardforks}; +use reth_cli::chainspec::ChainSpecParser; +use reth_fs_util as fs; +use std::{ + io::{self, Read, Write}, + path::Path, + sync::Arc, + time::{Duration, Instant}, +}; +use tar::Archive; +use tokio::task; +use tracing::info; + +const BYTE_UNITS: [&str; 4] = ["B", "KB", "MB", "GB"]; +const MERKLE_BASE_URL: &str = "https://downloads.merkle.io"; +const EXTENSION_TAR_FILE: &str = ".tar.lz4"; + +#[derive(Debug, Parser)] +pub struct DownloadCommand { + #[command(flatten)] + env: EnvironmentArgs, + + #[arg( + long, + short, + help = "Custom URL to download the snapshot from", + long_help = "Specify a snapshot URL or let the command propose a default one.\n\ + \n\ + Available snapshot sources:\n\ + - https://downloads.merkle.io (default, mainnet archive)\n\ + - https://publicnode.com/snapshots (full nodes & testnets)\n\ + \n\ + If no URL is provided, the latest mainnet archive snapshot\n\ + will be proposed for download from merkle.io" + )] + url: Option, +} + +impl> DownloadCommand { + pub async fn execute(self) -> Result<()> { + let data_dir = self.env.datadir.resolve_datadir(self.env.chain.chain()); + fs::create_dir_all(&data_dir)?; + + let url = match self.url { + Some(url) => url, + None => { + let url = get_latest_snapshot_url().await?; + info!(target: "reth::cli", "Using default snapshot URL: {}", url); + url + } + }; + + info!(target: "reth::cli", + chain = %self.env.chain.chain(), + dir = ?data_dir.data_dir(), + url = %url, + "Starting snapshot download and extraction" + ); + + stream_and_extract(&url, data_dir.data_dir()).await?; + info!(target: "reth::cli", "Snapshot downloaded and extracted successfully"); + + Ok(()) + } +} + +impl DownloadCommand { + /// Returns the underlying chain being used to run this command + pub fn chain_spec(&self) -> Option<&Arc> { + Some(&self.env.chain) + } +} + +// Monitor process status and display progress every 100ms +// to avoid overwhelming stdout +struct DownloadProgress { + downloaded: u64, + total_size: u64, + last_displayed: Instant, +} + +impl DownloadProgress { + /// Creates new progress tracker with given total size + fn new(total_size: u64) -> Self { + Self { downloaded: 0, total_size, last_displayed: Instant::now() } + } + + /// Converts bytes to human readable format (B, KB, MB, GB) + fn format_size(size: u64) -> String { + let mut size = size as f64; + let mut unit_index = 0; + + while size >= 1024.0 && unit_index < BYTE_UNITS.len() - 1 { + size /= 1024.0; + unit_index += 1; + } + + format!("{:.2} {}", size, BYTE_UNITS[unit_index]) + } + + /// Updates progress bar + fn update(&mut self, chunk_size: u64) -> Result<()> { + self.downloaded += chunk_size; + + // Only update display at most 10 times per second for efficiency + if self.last_displayed.elapsed() >= Duration::from_millis(100) { + let formatted_downloaded = Self::format_size(self.downloaded); + let formatted_total = Self::format_size(self.total_size); + let progress = (self.downloaded as f64 / self.total_size as f64) * 100.0; + + print!( + "\rDownloading and extracting... {progress:.2}% ({formatted_downloaded} / {formatted_total})", + ); + io::stdout().flush()?; + self.last_displayed = Instant::now(); + } + + Ok(()) + } +} + +/// Adapter to track progress while reading +struct ProgressReader { + reader: R, + progress: DownloadProgress, +} + +impl ProgressReader { + fn new(reader: R, total_size: u64) -> Self { + Self { reader, progress: DownloadProgress::new(total_size) } + } +} + +impl Read for ProgressReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let bytes = self.reader.read(buf)?; + if bytes > 0 { + if let Err(e) = self.progress.update(bytes as u64) { + return Err(io::Error::other(e)); + } + } + Ok(bytes) + } +} + +/// Downloads and extracts a snapshot with blocking approach +fn blocking_download_and_extract(url: &str, target_dir: &Path) -> Result<()> { + let client = reqwest::blocking::Client::builder().build()?; + let response = client.get(url).send()?.error_for_status()?; + + let total_size = response.content_length().ok_or_else(|| { + eyre::eyre!( + "Server did not provide Content-Length header. This is required for snapshot downloads" + ) + })?; + + let progress_reader = ProgressReader::new(response, total_size); + + let decoder = Decoder::new(progress_reader)?; + let mut archive = Archive::new(decoder); + + archive.unpack(target_dir)?; + + info!(target: "reth::cli", "Extraction complete."); + Ok(()) +} + +async fn stream_and_extract(url: &str, target_dir: &Path) -> Result<()> { + let target_dir = target_dir.to_path_buf(); + let url = url.to_string(); + task::spawn_blocking(move || blocking_download_and_extract(&url, &target_dir)).await??; + + Ok(()) +} + +// Builds default URL for latest mainnet archive snapshot +async fn get_latest_snapshot_url() -> Result { + let latest_url = format!("{MERKLE_BASE_URL}/latest.txt"); + let filename = Client::new() + .get(latest_url) + .send() + .await? + .error_for_status()? + .text() + .await? + .trim() + .to_string(); + + if !filename.ends_with(EXTENSION_TAR_FILE) { + return Err(eyre::eyre!("Unexpected snapshot filename format: {}", filename)); + } + + Ok(format!("{MERKLE_BASE_URL}/{filename}")) +} diff --git a/crates/cli/commands/src/lib.rs b/crates/cli/commands/src/lib.rs index 24ba3f60a9..2789ad41bb 100644 --- a/crates/cli/commands/src/lib.rs +++ b/crates/cli/commands/src/lib.rs @@ -11,6 +11,7 @@ pub mod common; pub mod config_cmd; pub mod db; +pub mod download; pub mod dump_genesis; pub mod import; pub mod import_era; diff --git a/crates/ethereum/cli/src/interface.rs b/crates/ethereum/cli/src/interface.rs index 5f9474e401..ebfe1bbb66 100644 --- a/crates/ethereum/cli/src/interface.rs +++ b/crates/ethereum/cli/src/interface.rs @@ -5,7 +5,7 @@ use clap::{Parser, Subcommand}; use reth_chainspec::ChainSpec; use reth_cli::chainspec::ChainSpecParser; use reth_cli_commands::{ - config_cmd, db, dump_genesis, import, import_era, init_cmd, init_state, + config_cmd, db, download, dump_genesis, import, import_era, init_cmd, init_state, node::{self, NoArgs}, p2p, prune, recover, stage, }; @@ -170,6 +170,9 @@ impl, Ext: clap::Args + fmt::Debug> Cl Commands::Db(command) => { runner.run_blocking_until_ctrl_c(command.execute::()) } + Commands::Download(command) => { + runner.run_blocking_until_ctrl_c(command.execute::()) + } Commands::Stage(command) => runner.run_command_until_exit(|ctx| { command.execute::(ctx, components) }), @@ -223,6 +226,9 @@ pub enum Commands { /// Database debugging utilities #[command(name = "db")] Db(db::Command), + /// Download public node snapshots + #[command(name = "download")] + Download(download::DownloadCommand), /// Manipulate individual stages. #[command(name = "stage")] Stage(stage::Command), @@ -258,6 +264,7 @@ impl Commands { Self::ImportEra(cmd) => cmd.chain_spec(), Self::DumpGenesis(cmd) => cmd.chain_spec(), Self::Db(cmd) => cmd.chain_spec(), + Self::Download(cmd) => cmd.chain_spec(), Self::Stage(cmd) => cmd.chain_spec(), Self::P2P(cmd) => cmd.chain_spec(), #[cfg(feature = "dev")] diff --git a/deny.toml b/deny.toml index 1cf27864a7..7c588e50fd 100644 --- a/deny.toml +++ b/deny.toml @@ -61,7 +61,6 @@ exceptions = [ # TODO: decide on MPL-2.0 handling # These dependencies are grandfathered in https://github.com/paradigmxyz/reth/pull/6980 { allow = ["MPL-2.0"], name = "option-ext" }, - { allow = ["MPL-2.0"], name = "webpki-roots" }, { allow = ["MPL-2.0"], name = "webpki-root-certs" }, ]