diff --git a/crates/cli/commands/src/download/mod.rs b/crates/cli/commands/src/download/mod.rs index 90b48ffcb9..2edfe309ad 100644 --- a/crates/cli/commands/src/download/mod.rs +++ b/crates/cli/commands/src/download/mod.rs @@ -43,7 +43,8 @@ use url::Url; use zstd::stream::read::Decoder as ZstdDecoder; const BYTE_UNITS: [&str; 4] = ["B", "KB", "MB", "GB"]; -const MERKLE_BASE_URL: &str = "https://downloads.merkle.io"; +const RETH_SNAPSHOTS_BASE_URL: &str = "https://snapshots-r2.reth.rs"; +const RETH_SNAPSHOTS_API_URL: &str = "https://snapshots.reth.rs/api/snapshots"; const EXTENSION_TAR_LZ4: &str = ".tar.lz4"; const EXTENSION_TAR_ZSTD: &str = ".tar.zst"; const DOWNLOAD_CACHE_DIR: &str = ".download-cache"; @@ -98,14 +99,14 @@ impl DownloadDefaults { DOWNLOAD_DEFAULTS.get_or_init(DownloadDefaults::default_download_defaults) } - /// Default download configuration with defaults from merkle.io and publicnode + /// Default download configuration with defaults from snapshots.reth.rs and publicnode pub fn default_download_defaults() -> Self { Self { available_snapshots: vec![ - Cow::Borrowed("https://www.merkle.io/snapshots (default, mainnet archive)"), + Cow::Borrowed("https://snapshots.reth.rs (default)"), Cow::Borrowed("https://publicnode.com/snapshots (full nodes & testnets)"), ], - default_base_url: Cow::Borrowed(MERKLE_BASE_URL), + default_base_url: Cow::Borrowed(RETH_SNAPSHOTS_BASE_URL), default_chain_aware_base_url: None, long_help: None, } @@ -121,7 +122,9 @@ impl DownloadDefaults { } let mut help = String::from( - "Specify a snapshot URL or let the command propose a default one.\n\nAvailable snapshot sources:\n", + "Specify a snapshot URL or let the command propose a default one.\n\n\ + Browse available snapshots at https://snapshots.reth.rs\n\ + or use --list-snapshots to see them from the CLI.\n\nAvailable snapshot sources:\n", ); for source in &self.available_snapshots { @@ -188,6 +191,7 @@ pub struct DownloadCommand { /// Custom URL to download a single snapshot archive (legacy mode). /// /// When provided, downloads and extracts a single archive without component selection. + /// Browse available snapshots at or use --list-snapshots. #[arg(long, short, long_help = DownloadDefaults::get_global().long_help())] url: Option, @@ -248,6 +252,13 @@ pub struct DownloadCommand { /// Maximum number of concurrent modular archive workers. #[arg(long, default_value_t = MAX_CONCURRENT_DOWNLOADS)] download_concurrency: usize, + + /// List available snapshots from snapshots.reth.rs and exit. + /// + /// Queries the snapshots API and prints all available snapshots for the selected chain, + /// including block number, size, and manifest URL. + #[arg(long, alias = "list-snapshots", conflicts_with_all = ["url", "manifest_url", "manifest_path"])] + list: bool, } impl> DownloadCommand { @@ -260,8 +271,15 @@ impl> DownloadCo let cancel_token = CancellationToken::new(); let _cancel_guard = cancel_token.drop_guard(); + // --list: print available snapshots and exit + if self.list { + let entries = fetch_snapshot_api_entries(chain_id).await?; + print_snapshot_listing(&entries, chain_id); + return Ok(()); + } + // Legacy single-URL mode: download one archive and extract it - if let Some(url) = self.url { + if let Some(ref url) = self.url { info!(target: "reth::cli", dir = ?data_dir.data_dir(), url = %url, @@ -269,7 +287,7 @@ impl> DownloadCo ); stream_and_extract( - &url, + url, data_dir.data_dir(), None, self.resumable, @@ -282,7 +300,7 @@ impl> DownloadCo } // Modular download: fetch manifest and select components - let manifest_source = self.resolve_manifest_source(chain_id); + let manifest_source = self.resolve_manifest_source(chain_id).await?; info!(target: "reth::cli", source = %manifest_source, "Fetching snapshot manifest"); let mut manifest = fetch_manifest_from_source(&manifest_source).await?; @@ -621,17 +639,14 @@ impl> DownloadCo } } - fn resolve_manifest_source(&self, chain_id: u64) -> String { + async fn resolve_manifest_source(&self, chain_id: u64) -> Result { if let Some(path) = &self.manifest_path { - return path.display().to_string(); + return Ok(path.display().to_string()); } match &self.manifest_url { - Some(url) => url.clone(), - None => { - let base_url = get_base_url(chain_id); - format!("{base_url}/manifest.json") - } + Some(url) => Ok(url.clone()), + None => discover_manifest_url(chain_id).await, } } } @@ -1572,15 +1587,128 @@ fn file_blake3_hex(path: &Path) -> Result { Ok(hasher.finalize().to_hex().to_string()) } -/// Builds the base URL for the given chain ID using configured defaults. -fn get_base_url(chain_id: u64) -> String { - let defaults = DownloadDefaults::get_global(); - match &defaults.default_chain_aware_base_url { - Some(url) => format!("{url}/{chain_id}"), - None => defaults.default_base_url.to_string(), +/// Discovers the latest snapshot manifest URL for the given chain from the snapshots API. +/// +/// Queries `snapshots.reth.rs/api/snapshots` and returns the manifest URL for the most +/// recent modular snapshot matching the requested chain. +async fn discover_manifest_url(chain_id: u64) -> Result { + let api_url = RETH_SNAPSHOTS_API_URL; + + info!(target: "reth::cli", %api_url, %chain_id, "Discovering latest snapshot manifest"); + + let entries = fetch_snapshot_api_entries(chain_id).await?; + + let entry = + entries.iter().filter(|s| s.is_modular()).max_by_key(|s| s.block).ok_or_else(|| { + eyre::eyre!( + "No modular snapshot manifest found for chain \ + {chain_id} at {api_url}\n\n\ + You can provide a manifest URL directly with --manifest-url, or\n\ + use a direct snapshot URL with -u from:\n\ + \t- https://snapshots.reth.rs\n\n\ + Use --list to see all available snapshots." + ) + })?; + + info!(target: "reth::cli", + block = entry.block, + url = %entry.metadata_url, + "Found latest snapshot manifest" + ); + + Ok(entry.metadata_url.clone()) +} + +/// Deserializes a JSON value that may be either a number or a string-encoded number. +fn deserialize_string_or_u64<'de, D>(deserializer: D) -> std::result::Result +where + D: serde::Deserializer<'de>, +{ + use serde::Deserialize; + let value = serde_json::Value::deserialize(deserializer)?; + match &value { + serde_json::Value::Number(n) => { + n.as_u64().ok_or_else(|| serde::de::Error::custom("expected u64")) + } + serde_json::Value::String(s) => { + s.parse::().map_err(|_| serde::de::Error::custom("expected numeric string")) + } + _ => Err(serde::de::Error::custom("expected number or string")), } } +/// An entry from the `snapshots.reth.rs/api/snapshots` listing. +#[derive(serde::Deserialize)] +#[serde(rename_all = "camelCase")] +struct SnapshotApiEntry { + #[serde(deserialize_with = "deserialize_string_or_u64")] + chain_id: u64, + #[serde(deserialize_with = "deserialize_string_or_u64")] + block: u64, + #[serde(default)] + date: Option, + #[serde(default)] + profile: Option, + metadata_url: String, + #[serde(default)] + size: u64, +} + +impl SnapshotApiEntry { + fn is_modular(&self) -> bool { + self.metadata_url.ends_with("manifest.json") + } +} + +/// Fetches the full snapshot listing from the snapshots API, filtered by chain ID. +async fn fetch_snapshot_api_entries(chain_id: u64) -> Result> { + let api_url = RETH_SNAPSHOTS_API_URL; + + let entries: Vec = Client::new() + .get(api_url) + .send() + .await + .and_then(|r| r.error_for_status()) + .wrap_err_with(|| format!("Failed to fetch snapshot listing from {api_url}"))? + .json() + .await?; + + Ok(entries.into_iter().filter(|e| e.chain_id == chain_id).collect()) +} + +/// Prints a formatted table of available modular snapshots. +fn print_snapshot_listing(entries: &[SnapshotApiEntry], chain_id: u64) { + let modular: Vec<_> = entries.iter().filter(|e| e.is_modular()).collect(); + + println!("Available snapshots for chain {chain_id} (https://snapshots.reth.rs):\n"); + println!("{:<12} {:>10} {:<10} {:>10} MANIFEST URL", "DATE", "BLOCK", "PROFILE", "SIZE"); + println!("{}", "-".repeat(100)); + + for entry in &modular { + let date = entry.date.as_deref().unwrap_or("-"); + let profile = entry.profile.as_deref().unwrap_or("-"); + let size = if entry.size > 0 { + DownloadProgress::format_size(entry.size) + } else { + "-".to_string() + }; + + println!( + "{date:<12} {:>10} {profile:<10} {size:>10} {}", + entry.block, entry.metadata_url + ); + } + + if modular.is_empty() { + println!(" (no modular snapshots found)"); + } + + println!( + "\nTo download a specific snapshot, copy its manifest URL and run:\n \ + reth download --manifest-url " + ); +} + async fn fetch_manifest_from_source(source: &str) -> Result { if let Ok(parsed) = Url::parse(source) { return match parsed.scheme() { @@ -1597,7 +1725,7 @@ async fn fetch_manifest_from_source(source: &str) -> Result { You can use a direct snapshot URL instead:\n\n\ \treth download -u \n\n\ Available snapshot sources:\n\ - \t- https://www.merkle.io/snapshots\n\ + \t- https://snapshots.reth.rs\n\ \t- https://publicnode.com/snapshots" ) })?; @@ -1666,26 +1794,6 @@ fn resolve_manifest_base_url(manifest: &SnapshotManifest, source: &str) -> Resul Ok(base) } -/// Builds default URL for latest mainnet archive snapshot using configured defaults. -/// -/// Used by the legacy single-archive download flow when no manifest is available. -#[allow(dead_code)] -async fn get_latest_snapshot_url(chain_id: u64) -> Result { - let base_url = get_base_url(chain_id); - let latest_url = format!("{base_url}/latest.txt"); - let filename = Client::new() - .get(latest_url) - .send() - .await? - .error_for_status()? - .text() - .await? - .trim() - .to_string(); - - Ok(format!("{base_url}/{filename}")) -} - #[cfg(test)] mod tests { use super::*; @@ -1750,7 +1858,7 @@ mod tests { let help = defaults.long_help(); assert!(help.contains("Available snapshot sources:")); - assert!(help.contains("merkle.io")); + assert!(help.contains("snapshots.reth.rs")); assert!(help.contains("publicnode.com")); assert!(help.contains("file://")); } diff --git a/docs/vocs/docs/pages/cli/reth/download.mdx b/docs/vocs/docs/pages/cli/reth/download.mdx index 91f77d5e0c..0a5698c317 100644 --- a/docs/vocs/docs/pages/cli/reth/download.mdx +++ b/docs/vocs/docs/pages/cli/reth/download.mdx @@ -115,12 +115,15 @@ Storage: -u, --url Specify a snapshot URL or let the command propose a default one. + Browse available snapshots at https://snapshots.reth.rs + or use --list-snapshots to see them from the CLI. + Available snapshot sources: - - https://www.merkle.io/snapshots (default, mainnet archive) + - https://snapshots.reth.rs (default) - https://publicnode.com/snapshots (full nodes & testnets) If no URL is provided, the latest archive snapshot for the selected chain - will be proposed for download from https://downloads.merkle.io. + will be proposed for download from https://snapshots-r2.reth.rs. Local file:// URLs are also supported for extracting snapshots from disk. @@ -168,6 +171,11 @@ Storage: [default: 8] + --list + List available snapshots from snapshots.reth.rs and exit. + + Queries the snapshots API and prints all available snapshots for the selected chain, including block number, size, and manifest URL. + Logging: --log.stdout.format The format to use for logs written to stdout