feat: add command download to download public node snapshots (#13598)

Co-authored-by: joshieDo <93316087+joshieDo@users.noreply.github.com>
Co-authored-by: Matthias Seitz <matthias.seitz@outlook.de>
This commit is contained in:
Léa Narzis
2025-05-07 11:28:48 +02:00
committed by GitHub
parent 5c64d59153
commit 178b0c1cb5
12 changed files with 422 additions and 3 deletions

45
Cargo.lock generated
View File

@@ -5324,6 +5324,25 @@ dependencies = [
"hashbrown 0.15.3",
]
[[package]]
name = "lz4"
version = "1.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a20b523e860d03443e98350ceaac5e71c6ba89aea7d960769ec3ce37f4de5af4"
dependencies = [
"lz4-sys",
]
[[package]]
name = "lz4-sys"
version = "1.11.1+lz4-1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "lz4_flex"
version = "0.11.3"
@@ -6875,6 +6894,7 @@ version = "1.3.12"
dependencies = [
"alloy-rpc-types",
"aquamarine",
"backon",
"clap",
"eyre",
"reth-chainspec",
@@ -6910,6 +6930,8 @@ dependencies = [
"reth-tasks",
"reth-tokio-util",
"reth-transaction-pool",
"similar-asserts",
"tempfile",
"tokio",
"tracing",
]
@@ -7058,6 +7080,7 @@ dependencies = [
"futures",
"human_bytes",
"itertools 0.14.0",
"lz4",
"proptest",
"proptest-arbitrary-interop",
"ratatui",
@@ -7108,6 +7131,7 @@ dependencies = [
"secp256k1",
"serde",
"serde_json",
"tar",
"tokio",
"tokio-stream",
"toml",
@@ -11463,6 +11487,17 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
[[package]]
name = "tar"
version = "0.4.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a"
dependencies = [
"filetime",
"libc",
"xattr",
]
[[package]]
name = "tar-no-std"
version = "0.3.3"
@@ -13176,6 +13211,16 @@ dependencies = [
"tap",
]
[[package]]
name = "xattr"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d65cbf2f12c15564212d48f4e3dfb87923d25d611f2aed18f4cb23f0413d89e"
dependencies = [
"libc",
"rustix 1.0.7",
]
[[package]]
name = "yansi"
version = "1.0.1"

View File

@@ -530,6 +530,7 @@ humantime = "2.1"
humantime-serde = "1.1"
itertools = { version = "0.14", default-features = false }
linked_hash_set = "0.1"
lz4 = "1.28.1"
modular-bitfield = "0.11.2"
notify = { version = "8.0.0", default-features = false, features = ["macos_fsevent"] }
nybbles = { version = "0.3.0", default-features = false }
@@ -550,6 +551,7 @@ strum = { version = "0.27", default-features = false }
strum_macros = "0.27"
syn = "2.0"
thiserror = { version = "2.0.0", default-features = false }
tar = "0.4.44"
tracing = { version = "0.1.0", default-features = false }
tracing-appender = "0.2"
url = { version = "2.3", default-features = false }

View File

@@ -59,8 +59,13 @@ tokio = { workspace = true, features = ["sync", "macros", "time", "rt-multi-thre
# misc
aquamarine.workspace = true
eyre.workspace = true
clap = { workspace = true, features = ["derive", "env"] }
eyre.workspace = true
[dev-dependencies]
backon.workspace = true
similar-asserts.workspace = true
tempfile.workspace = true
[features]
default = ["jemalloc", "reth-revm/portable"]

View File

@@ -49,6 +49,7 @@
- [`reth db clear static-file`](./cli/reth/db/clear/static-file.md)
- [`reth db version`](./cli/reth/db/version.md)
- [`reth db path`](./cli/reth/db/path.md)
- [`reth download`](./cli/reth/download.md)
- [`reth stage`](./cli/reth/stage.md)
- [`reth stage run`](./cli/reth/stage/run.md)
- [`reth stage drop`](./cli/reth/stage/drop.md)

1
book/cli/SUMMARY.md vendored
View File

@@ -19,6 +19,7 @@
- [`reth db clear static-file`](./reth/db/clear/static-file.md)
- [`reth db version`](./reth/db/version.md)
- [`reth db path`](./reth/db/path.md)
- [`reth download`](./reth/download.md)
- [`reth stage`](./reth/stage.md)
- [`reth stage run`](./reth/stage/run.md)
- [`reth stage drop`](./reth/stage/drop.md)

1
book/cli/reth.md vendored
View File

@@ -16,6 +16,7 @@ Commands:
import-era This syncs ERA encoded blocks from a directory
dump-genesis Dumps genesis block JSON configuration to stdout
db Database debugging utilities
download Download public node snapshots
stage Manipulate individual stages
p2p P2P Debugging utilities
config Write config to stdout

156
book/cli/reth/download.md vendored Normal file
View File

@@ -0,0 +1,156 @@
# reth download
Download public node snapshots
```bash
$ reth download --help
```
```txt
Usage: reth download [OPTIONS]
Options:
-h, --help
Print help (see a summary with '-h')
Datadir:
--datadir <DATA_DIR>
The path to the data dir for all reth files and subdirectories.
Defaults to the OS-specific data directory:
- Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/`
- Windows: `{FOLDERID_RoamingAppData}/reth/`
- macOS: `$HOME/Library/Application Support/reth/`
[default: default]
--datadir.static-files <PATH>
The absolute path to store static files in.
--config <FILE>
The path to the configuration file to use
--chain <CHAIN_OR_PATH>
The chain this node is running.
Possible values are either a built-in chain or the path to a chain specification file.
Built-in chains:
mainnet, sepolia, holesky, hoodi, dev
[default: mainnet]
Database:
--db.log-level <LOG_LEVEL>
Database logging level. Levels higher than "notice" require a debug build
Possible values:
- fatal: Enables logging for critical conditions, i.e. assertion failures
- error: Enables logging for error conditions
- warn: Enables logging for warning conditions
- notice: Enables logging for normal but significant condition
- verbose: Enables logging for verbose informational
- debug: Enables logging for debug-level messages
- trace: Enables logging for trace debug-level messages
- extra: Enables logging for extra debug-level messages
--db.exclusive <EXCLUSIVE>
Open environment in exclusive/monopolistic mode. Makes it possible to open a database on an NFS volume
[possible values: true, false]
--db.max-size <MAX_SIZE>
Maximum database size (e.g., 4TB, 8MB)
--db.growth-step <GROWTH_STEP>
Database growth step (e.g., 4GB, 4KB)
--db.read-transaction-timeout <READ_TRANSACTION_TIMEOUT>
Read transaction timeout in seconds, 0 means no timeout
-u, --url <URL>
Specify a snapshot URL or let the command propose a default one.
Available snapshot sources:
- https://downloads.merkle.io (default, mainnet archive)
- https://publicnode.com/snapshots (full nodes & testnets)
If no URL is provided, the latest mainnet archive snapshot
will be proposed for download from merkle.io
Logging:
--log.stdout.format <FORMAT>
The format to use for logs written to stdout
[default: terminal]
Possible values:
- json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging
- log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications
- terminal: Represents terminal-friendly formatting for logs
--log.stdout.filter <FILTER>
The filter to use for logs written to stdout
[default: ]
--log.file.format <FORMAT>
The format to use for logs written to the log file
[default: terminal]
Possible values:
- json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging
- log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications
- terminal: Represents terminal-friendly formatting for logs
--log.file.filter <FILTER>
The filter to use for logs written to the log file
[default: debug]
--log.file.directory <PATH>
The path to put log files in
[default: <CACHE_DIR>/logs]
--log.file.max-size <SIZE>
The maximum size (in MB) of one log file
[default: 200]
--log.file.max-files <COUNT>
The maximum amount of log files that will be stored. If set to 0, background file logging is disabled
[default: 5]
--log.journald
Write logs to journald
--log.journald.filter <FILTER>
The filter to use for logs written to journald
[default: error]
--color <COLOR>
Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting
[default: always]
Possible values:
- always: Colors on
- auto: Colors on
- never: Colors off
Display:
-v, --verbosity...
Set the minimum log level.
-v Errors
-vv Warnings
-vvv Info
-vvvv Debug
-vvvvv Traces (warning: very verbose!)
-q, --quiet
Silence all log output
```

View File

@@ -70,8 +70,10 @@ ahash.workspace = true
human_bytes.workspace = true
eyre.workspace = true
clap = { workspace = true, features = ["derive", "env"] }
lz4.workspace = true
serde.workspace = true
serde_json.workspace = true
tar.workspace = true
tracing.workspace = true
backon.workspace = true
secp256k1 = { workspace = true, features = ["global-context", "std", "recovery"] }

View File

@@ -0,0 +1,199 @@
use crate::common::EnvironmentArgs;
use clap::Parser;
use eyre::Result;
use lz4::Decoder;
use reqwest::Client;
use reth_chainspec::{EthChainSpec, EthereumHardforks};
use reth_cli::chainspec::ChainSpecParser;
use reth_fs_util as fs;
use std::{
io::{self, Read, Write},
path::Path,
sync::Arc,
time::{Duration, Instant},
};
use tar::Archive;
use tokio::task;
use tracing::info;
const BYTE_UNITS: [&str; 4] = ["B", "KB", "MB", "GB"];
const MERKLE_BASE_URL: &str = "https://downloads.merkle.io";
const EXTENSION_TAR_FILE: &str = ".tar.lz4";
#[derive(Debug, Parser)]
pub struct DownloadCommand<C: ChainSpecParser> {
#[command(flatten)]
env: EnvironmentArgs<C>,
#[arg(
long,
short,
help = "Custom URL to download the snapshot from",
long_help = "Specify a snapshot URL or let the command propose a default one.\n\
\n\
Available snapshot sources:\n\
- https://downloads.merkle.io (default, mainnet archive)\n\
- https://publicnode.com/snapshots (full nodes & testnets)\n\
\n\
If no URL is provided, the latest mainnet archive snapshot\n\
will be proposed for download from merkle.io"
)]
url: Option<String>,
}
impl<C: ChainSpecParser<ChainSpec: EthChainSpec + EthereumHardforks>> DownloadCommand<C> {
pub async fn execute<N>(self) -> Result<()> {
let data_dir = self.env.datadir.resolve_datadir(self.env.chain.chain());
fs::create_dir_all(&data_dir)?;
let url = match self.url {
Some(url) => url,
None => {
let url = get_latest_snapshot_url().await?;
info!(target: "reth::cli", "Using default snapshot URL: {}", url);
url
}
};
info!(target: "reth::cli",
chain = %self.env.chain.chain(),
dir = ?data_dir.data_dir(),
url = %url,
"Starting snapshot download and extraction"
);
stream_and_extract(&url, data_dir.data_dir()).await?;
info!(target: "reth::cli", "Snapshot downloaded and extracted successfully");
Ok(())
}
}
impl<C: ChainSpecParser> DownloadCommand<C> {
/// Returns the underlying chain being used to run this command
pub fn chain_spec(&self) -> Option<&Arc<C::ChainSpec>> {
Some(&self.env.chain)
}
}
// Monitor process status and display progress every 100ms
// to avoid overwhelming stdout
struct DownloadProgress {
downloaded: u64,
total_size: u64,
last_displayed: Instant,
}
impl DownloadProgress {
/// Creates new progress tracker with given total size
fn new(total_size: u64) -> Self {
Self { downloaded: 0, total_size, last_displayed: Instant::now() }
}
/// Converts bytes to human readable format (B, KB, MB, GB)
fn format_size(size: u64) -> String {
let mut size = size as f64;
let mut unit_index = 0;
while size >= 1024.0 && unit_index < BYTE_UNITS.len() - 1 {
size /= 1024.0;
unit_index += 1;
}
format!("{:.2} {}", size, BYTE_UNITS[unit_index])
}
/// Updates progress bar
fn update(&mut self, chunk_size: u64) -> Result<()> {
self.downloaded += chunk_size;
// Only update display at most 10 times per second for efficiency
if self.last_displayed.elapsed() >= Duration::from_millis(100) {
let formatted_downloaded = Self::format_size(self.downloaded);
let formatted_total = Self::format_size(self.total_size);
let progress = (self.downloaded as f64 / self.total_size as f64) * 100.0;
print!(
"\rDownloading and extracting... {progress:.2}% ({formatted_downloaded} / {formatted_total})",
);
io::stdout().flush()?;
self.last_displayed = Instant::now();
}
Ok(())
}
}
/// Adapter to track progress while reading
struct ProgressReader<R> {
reader: R,
progress: DownloadProgress,
}
impl<R: Read> ProgressReader<R> {
fn new(reader: R, total_size: u64) -> Self {
Self { reader, progress: DownloadProgress::new(total_size) }
}
}
impl<R: Read> Read for ProgressReader<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let bytes = self.reader.read(buf)?;
if bytes > 0 {
if let Err(e) = self.progress.update(bytes as u64) {
return Err(io::Error::other(e));
}
}
Ok(bytes)
}
}
/// Downloads and extracts a snapshot with blocking approach
fn blocking_download_and_extract(url: &str, target_dir: &Path) -> Result<()> {
let client = reqwest::blocking::Client::builder().build()?;
let response = client.get(url).send()?.error_for_status()?;
let total_size = response.content_length().ok_or_else(|| {
eyre::eyre!(
"Server did not provide Content-Length header. This is required for snapshot downloads"
)
})?;
let progress_reader = ProgressReader::new(response, total_size);
let decoder = Decoder::new(progress_reader)?;
let mut archive = Archive::new(decoder);
archive.unpack(target_dir)?;
info!(target: "reth::cli", "Extraction complete.");
Ok(())
}
async fn stream_and_extract(url: &str, target_dir: &Path) -> Result<()> {
let target_dir = target_dir.to_path_buf();
let url = url.to_string();
task::spawn_blocking(move || blocking_download_and_extract(&url, &target_dir)).await??;
Ok(())
}
// Builds default URL for latest mainnet archive snapshot
async fn get_latest_snapshot_url() -> Result<String> {
let latest_url = format!("{MERKLE_BASE_URL}/latest.txt");
let filename = Client::new()
.get(latest_url)
.send()
.await?
.error_for_status()?
.text()
.await?
.trim()
.to_string();
if !filename.ends_with(EXTENSION_TAR_FILE) {
return Err(eyre::eyre!("Unexpected snapshot filename format: {}", filename));
}
Ok(format!("{MERKLE_BASE_URL}/{filename}"))
}

View File

@@ -11,6 +11,7 @@
pub mod common;
pub mod config_cmd;
pub mod db;
pub mod download;
pub mod dump_genesis;
pub mod import;
pub mod import_era;

View File

@@ -5,7 +5,7 @@ use clap::{Parser, Subcommand};
use reth_chainspec::ChainSpec;
use reth_cli::chainspec::ChainSpecParser;
use reth_cli_commands::{
config_cmd, db, dump_genesis, import, import_era, init_cmd, init_state,
config_cmd, db, download, dump_genesis, import, import_era, init_cmd, init_state,
node::{self, NoArgs},
p2p, prune, recover, stage,
};
@@ -170,6 +170,9 @@ impl<C: ChainSpecParser<ChainSpec = ChainSpec>, Ext: clap::Args + fmt::Debug> Cl
Commands::Db(command) => {
runner.run_blocking_until_ctrl_c(command.execute::<EthereumNode>())
}
Commands::Download(command) => {
runner.run_blocking_until_ctrl_c(command.execute::<EthereumNode>())
}
Commands::Stage(command) => runner.run_command_until_exit(|ctx| {
command.execute::<EthereumNode, _, _, EthNetworkPrimitives>(ctx, components)
}),
@@ -223,6 +226,9 @@ pub enum Commands<C: ChainSpecParser, Ext: clap::Args + fmt::Debug> {
/// Database debugging utilities
#[command(name = "db")]
Db(db::Command<C>),
/// Download public node snapshots
#[command(name = "download")]
Download(download::DownloadCommand<C>),
/// Manipulate individual stages.
#[command(name = "stage")]
Stage(stage::Command<C>),
@@ -258,6 +264,7 @@ impl<C: ChainSpecParser, Ext: clap::Args + fmt::Debug> Commands<C, Ext> {
Self::ImportEra(cmd) => cmd.chain_spec(),
Self::DumpGenesis(cmd) => cmd.chain_spec(),
Self::Db(cmd) => cmd.chain_spec(),
Self::Download(cmd) => cmd.chain_spec(),
Self::Stage(cmd) => cmd.chain_spec(),
Self::P2P(cmd) => cmd.chain_spec(),
#[cfg(feature = "dev")]

View File

@@ -61,7 +61,6 @@ exceptions = [
# TODO: decide on MPL-2.0 handling
# These dependencies are grandfathered in https://github.com/paradigmxyz/reth/pull/6980
{ allow = ["MPL-2.0"], name = "option-ext" },
{ allow = ["MPL-2.0"], name = "webpki-roots" },
{ allow = ["MPL-2.0"], name = "webpki-root-certs" },
]