From 9ab57f70e3e5686b59a6f93fc41363d46707de73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roman=20Hodul=C3=A1k?= Date: Tue, 29 Apr 2025 16:50:43 +0200 Subject: [PATCH] feat(era): Implement SHA-256 checksum validation for local directory (#15987) --- crates/era-downloader/Cargo.toml | 1 + crates/era-downloader/src/fs.rs | 37 ++++++++++- crates/era-downloader/tests/it/fs.rs | 99 +++++++++++++++++++++++++--- 3 files changed, 124 insertions(+), 13 deletions(-) diff --git a/crates/era-downloader/Cargo.toml b/crates/era-downloader/Cargo.toml index b22fc9c2f9..84a5187a70 100644 --- a/crates/era-downloader/Cargo.toml +++ b/crates/era-downloader/Cargo.toml @@ -32,6 +32,7 @@ eyre.workspace = true # crypto sha2.workspace = true +sha2.features = ["std"] [dev-dependencies] tokio.workspace = true diff --git a/crates/era-downloader/src/fs.rs b/crates/era-downloader/src/fs.rs index baccc7d3a1..076c2f40f8 100644 --- a/crates/era-downloader/src/fs.rs +++ b/crates/era-downloader/src/fs.rs @@ -1,15 +1,19 @@ use crate::EraMeta; +use alloy_primitives::{hex, hex::ToHexExt}; +use eyre::{eyre, OptionExt}; use futures_util::{stream, Stream}; use reth_fs_util as fs; -use std::{fmt::Debug, path::Path, str::FromStr}; +use sha2::{Digest, Sha256}; +use std::{fmt::Debug, io, io::BufRead, path::Path, str::FromStr}; /// Creates a new ordered asynchronous [`Stream`] of ERA1 files read from `dir`. pub fn read_dir( dir: impl AsRef + Send + Sync + 'static, ) -> eyre::Result> + Send + Sync + 'static + Unpin> { + let mut checksums = None; let mut entries = fs::read_dir(dir)? .filter_map(|entry| { - (move || { + (|| { let path = entry?.path(); if path.extension() == Some("era1".as_ref()) { @@ -24,16 +28,43 @@ pub fn read_dir( } } } + if path.file_name() == Some("checksums.txt".as_ref()) { + let file = fs::open(path)?; + let reader = io::BufReader::new(file); + let lines = reader.lines(); + checksums = Some(lines); + } Ok(None) })() .transpose() }) .collect::>>()?; + let mut checksums = checksums.ok_or_eyre("Missing file `checksums.txt` in the `dir`")?; entries.sort_by(|(left, _), (right, _)| left.cmp(right)); - Ok(stream::iter(entries.into_iter().map(|(_, v)| Ok(EraLocalMeta::new(v))))) + Ok(stream::iter(entries.into_iter().map(move |(_, path)| { + let expected_checksum = + checksums.next().transpose()?.ok_or_eyre("Got less checksums than ERA files")?; + let expected_checksum = hex::decode(expected_checksum)?; + + let mut hasher = Sha256::new(); + let mut reader = io::BufReader::new(fs::open(&path)?); + + io::copy(&mut reader, &mut hasher)?; + let actual_checksum = hasher.finalize().to_vec(); + + if actual_checksum != expected_checksum { + return Err(eyre!( + "Checksum mismatch, got: {}, expected: {}", + actual_checksum.encode_hex(), + expected_checksum.encode_hex() + )); + } + + Ok(EraLocalMeta::new(path)) + }))) } /// Contains information about an ERA file that is on the local file-system and is read-only. diff --git a/crates/era-downloader/tests/it/fs.rs b/crates/era-downloader/tests/it/fs.rs index d2871027de..5ad7ba2800 100644 --- a/crates/era-downloader/tests/it/fs.rs +++ b/crates/era-downloader/tests/it/fs.rs @@ -1,25 +1,104 @@ +use alloy_primitives::hex::ToHexExt; use futures_util::StreamExt; use reth_era_downloader::read_dir; +use sha2::Digest; use tokio::fs; +const CONTENTS_0: &[u8; 1] = b"a"; +const CONTENTS_1: &[u8; 1] = b"b"; + +#[test_case::test_case( + Ok(format!( + "{}\n{}", + sha2::Sha256::digest(CONTENTS_0).encode_hex(), + sha2::Sha256::digest(CONTENTS_1).encode_hex() + )), + [ + Ok("mainnet-00000-5ec1ffb8.era1"), + Ok("mainnet-00001-a5364e9a.era1"), + ]; + "Reads all files successfully" +)] +#[test_case::test_case( + Ok("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\ + bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"), + [ + Err("Checksum mismatch, \ + got: ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb, \ + expected: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + Err("Checksum mismatch, \ + got: 3e23e8160039594a33894f6564e1b1348bbd7a0088d42c4acb73eeaed59c009d, \ + expected: bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"), + ]; + "With invalid checksums fails" +)] +#[test_case::test_case( + Ok(format!( + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n{}", + sha2::Sha256::digest(CONTENTS_1).encode_hex() + )), + [ + Err("Checksum mismatch, \ + got: ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb, \ + expected: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + Ok("mainnet-00001-a5364e9a.era1"), + ]; + "With one invalid checksum partially fails" +)] +#[test_case::test_case( + Err::<&str, _>("Missing file `checksums.txt` in the `dir`"), + [ + Err("Checksum mismatch, \ + got: ca978112ca1bbdcafac231b39a23dc4da786eff8147c4e72b9807785afee48bb, \ + expected: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + Ok("mainnet-00001-a5364e9a.era1"), + ]; + "With missing checksums file fails" +)] #[tokio::test] -async fn test_streaming_from_local_directory() { +async fn test_streaming_from_local_directory( + checksums: Result, &str>, + expected: [Result<&str, &str>; 2], +) { let folder = tempfile::tempdir().unwrap(); let folder = folder.path().to_owned(); - fs::write(folder.join("mainnet-00000-5ec1ffb8.era1"), b"").await.unwrap(); - fs::write(folder.join("mainnet-00001-a5364e9a.era1"), b"").await.unwrap(); + if let Ok(checksums) = &checksums { + fs::write(folder.join("checksums.txt"), checksums).await.unwrap(); + } + fs::write(folder.join("mainnet-00000-5ec1ffb8.era1"), CONTENTS_0).await.unwrap(); + fs::write(folder.join("mainnet-00001-a5364e9a.era1"), CONTENTS_1).await.unwrap(); let folder = folder.into_boxed_path(); - let mut stream = read_dir(folder.clone()).unwrap(); + let actual = read_dir(folder.clone()); - let expected_file = folder.join("mainnet-00000-5ec1ffb8.era1").into_boxed_path(); - let actual_file = stream.next().await.unwrap().unwrap(); + match checksums { + Ok(_) => match actual { + Ok(mut stream) => { + for expected in expected { + let actual = stream.next().await.unwrap(); - assert_eq!(actual_file, expected_file); + match expected { + Ok(expected_file) => { + let actual_file = actual.expect("should be ok"); + let expected_file = folder.join(expected_file).into_boxed_path(); - let expected_file = folder.join("mainnet-00001-a5364e9a.era1").into_boxed_path(); - let actual_file = stream.next().await.unwrap().unwrap(); + assert_eq!(actual_file, expected_file) + } + Err(expected_err) => { + let actual_err = actual.expect_err("should be err").to_string(); - assert_eq!(actual_file, expected_file); + assert_eq!(actual_err, expected_err) + } + } + } + } + + Err(err) => panic!("expected ok, got: {err:?}"), + }, + Err(expected_err) => match actual { + Ok(_) => panic!("should be err"), + Err(actual_err) => assert_eq!(actual_err.to_string(), expected_err), + }, + } }