From fad9c04b07fc330b905b680b7a45778d0313db38 Mon Sep 17 00:00:00 2001 From: Georgios Konstantopoulos Date: Thu, 16 Feb 2023 23:28:46 -0800 Subject: [PATCH] feat: Account Hashing Stage Benchmark (#1301) Co-authored-by: joshieDo --- Cargo.lock | 274 +++++++++++++++++- bin/reth/src/dump_stage/hashing_account.rs | 98 +++++++ bin/reth/src/dump_stage/mod.rs | 8 + .../interfaces/src/test_utils/generators.rs | 2 +- crates/stages/Cargo.toml | 2 + crates/stages/benches/README.md | 18 +- crates/stages/benches/criterion.rs | 100 ++++--- .../stages/benches/setup/account_hashing.rs | 85 ++++++ crates/stages/benches/setup/constants.rs | 1 + crates/stages/benches/setup/mod.rs | 43 +++ crates/stages/src/stages/hashing_account.rs | 189 +++++++----- 11 files changed, 682 insertions(+), 138 deletions(-) create mode 100644 bin/reth/src/dump_stage/hashing_account.rs create mode 100644 crates/stages/benches/setup/account_hashing.rs create mode 100644 crates/stages/benches/setup/constants.rs create mode 100644 crates/stages/benches/setup/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 77c6ec9803..e612bf8ab3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,21 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr2line" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a76fd60b23679b7d19bd066031410fb7e458ccc5e958eb5c325888ce4baedc97" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + [[package]] name = "aead" version = "0.4.3" @@ -67,6 +82,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" dependencies = [ "cfg-if", + "getrandom 0.2.8", "once_cell", "version_check", ] @@ -262,6 +278,21 @@ dependencies = [ "tokio", ] +[[package]] +name = "backtrace" +version = "0.3.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233d376d6d185f2a3093e58f283f60f880315b6c60075b01f36b3b85154564ca" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + [[package]] name = "base16ct" version = "0.1.1" @@ -495,6 +526,12 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" +[[package]] +name = "bytemuck" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c041d3eab048880cb0b86b256447da3f18859a163c3b8d8893f4e6368abe6393" + [[package]] name = "byteorder" version = "1.4.3" @@ -860,6 +897,15 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +[[package]] +name = "cpp_demangle" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b446fd40bcc17eddd6a4a78f24315eb90afdb3334999ddfd4909985c47722442" +dependencies = [ + "cfg-if", +] + [[package]] name = "cpufeatures" version = "0.2.5" @@ -1187,6 +1233,15 @@ version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23d8666cb01533c39dde32bcbab8e227b4ed6679b2c925eba05feabea39508fb" +[[package]] +name = "debugid" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" +dependencies = [ + "uuid 1.3.0", +] + [[package]] name = "delay_map" version = "0.1.2" @@ -1602,7 +1657,7 @@ dependencies = [ "sha2 0.10.6", "sha3", "thiserror", - "uuid", + "uuid 0.8.2", ] [[package]] @@ -1851,6 +1906,18 @@ dependencies = [ "subtle", ] +[[package]] +name = "findshlibs" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64" +dependencies = [ + "cc", + "lazy_static", + "libc", + "winapi", +] + [[package]] name = "fixed-hash" version = "0.8.0" @@ -2070,6 +2137,12 @@ dependencies = [ "polyval", ] +[[package]] +name = "gimli" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" + [[package]] name = "glob" version = "0.3.1" @@ -2278,6 +2351,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + [[package]] name = "hex" version = "0.4.3" @@ -2570,6 +2649,24 @@ dependencies = [ "serde", ] +[[package]] +name = "inferno" +version = "0.11.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fb7c1b80a1dfa604bb4a649a5c5aeef3d913f7c520cb42b40e534e8a61bcdfc" +dependencies = [ + "ahash 0.8.3", + "indexmap", + "is-terminal", + "itoa", + "log", + "num-format", + "once_cell", + "quick-xml", + "rgb", + "str_stack", +] + [[package]] name = "inout" version = "0.1.3" @@ -2599,7 +2696,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] @@ -2622,14 +2719,14 @@ checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146" [[package]] name = "is-terminal" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28dfb6c8100ccc63462345b67d1bbc3679177c75ee4bf59bf29c8b1d110b8189" +checksum = "22e18b0a45d56fe973d6db23972bf5bc46f988a4a2385deac9cc29572f09daef" dependencies = [ - "hermit-abi 0.2.6", + "hermit-abi 0.3.1", "io-lifetimes", "rustix", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -3015,6 +3112,15 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memmap2" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b182332558b18d807c4ce1ca8ca983b34c3ee32765e47b3f0f69b90355cc1dc" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.7.1" @@ -3098,6 +3204,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" +[[package]] +name = "miniz_oxide" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" +dependencies = [ + "adler", +] + [[package]] name = "mio" version = "0.8.5" @@ -3107,7 +3222,7 @@ dependencies = [ "libc", "log", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] @@ -3146,6 +3261,17 @@ dependencies = [ "smallvec", ] +[[package]] +name = "nix" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa52e972a9a719cecb6864fb88568781eb706bac2cd1d4f04a648542dbf78069" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + [[package]] name = "nom" version = "7.1.3" @@ -3209,6 +3335,16 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-format" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" +dependencies = [ + "arrayvec", + "itoa", +] + [[package]] name = "num-integer" version = "0.1.45" @@ -3282,6 +3418,15 @@ dependencies = [ "syn 1.0.107", ] +[[package]] +name = "object" +version = "0.30.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439" +dependencies = [ + "memchr", +] + [[package]] name = "once_cell" version = "1.17.1" @@ -3460,7 +3605,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] @@ -3641,6 +3786,28 @@ dependencies = [ "serde", ] +[[package]] +name = "pprof" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e20150f965e0e4c925982b9356da71c84bcd56cb66ef4e894825837cbcf6613e" +dependencies = [ + "backtrace", + "cfg-if", + "criterion", + "findshlibs", + "inferno", + "libc", + "log", + "nix", + "once_cell", + "parking_lot 0.12.1", + "smallvec", + "symbolic-demangle", + "tempfile", + "thiserror", +] + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -3793,6 +3960,15 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" +[[package]] +name = "quick-xml" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd" +dependencies = [ + "memchr", +] + [[package]] name = "quote" version = "0.6.13" @@ -4744,6 +4920,7 @@ dependencies = [ "async-trait", "cita_trie", "criterion", + "eyre", "futures-util", "hasher", "itertools 0.10.5", @@ -4751,6 +4928,7 @@ dependencies = [ "num-traits", "paste", "pin-project", + "pprof", "proptest", "rand 0.8.5", "rayon", @@ -4891,6 +5069,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rgb" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7495acf66551cdb696b7711408144bcd3194fc78e32f3a09e809bfe7dd4a7ce3" +dependencies = [ + "bytemuck", +] + [[package]] name = "ring" version = "0.16.20" @@ -4959,6 +5146,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62cc5760263ea229d367e7dff3c0cbf09e4797a125bd87059a6c095804f3b2d1" +[[package]] +name = "rustc-demangle" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" + [[package]] name = "rustc-hash" version = "1.1.0" @@ -4991,7 +5184,7 @@ dependencies = [ "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] @@ -5099,7 +5292,7 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3" dependencies = [ - "windows-sys", + "windows-sys 0.42.0", ] [[package]] @@ -5613,6 +5806,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "str_stack" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" + [[package]] name = "strsim" version = "0.9.3" @@ -5685,6 +5884,29 @@ dependencies = [ "anyhow", ] +[[package]] +name = "symbolic-common" +version = "10.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b55cdc318ede251d0957f07afe5fed912119b8c1bc5a7804151826db999e737" +dependencies = [ + "debugid", + "memmap2", + "stable_deref_trait", + "uuid 1.3.0", +] + +[[package]] +name = "symbolic-demangle" +version = "10.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79be897be8a483a81fff6a3a4e195b4ac838ef73ca42d348b3f722da9902e489" +dependencies = [ + "cpp_demangle", + "rustc-demangle", + "symbolic-common", +] + [[package]] name = "syn" version = "0.15.44" @@ -5918,7 +6140,7 @@ dependencies = [ "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] @@ -6468,6 +6690,12 @@ dependencies = [ "serde", ] +[[package]] +name = "uuid" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79" + [[package]] name = "valuable" version = "0.1.0" @@ -6702,6 +6930,30 @@ dependencies = [ "windows_x86_64_msvc", ] +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.1" diff --git a/bin/reth/src/dump_stage/hashing_account.rs b/bin/reth/src/dump_stage/hashing_account.rs new file mode 100644 index 0000000000..e5f6ccf019 --- /dev/null +++ b/bin/reth/src/dump_stage/hashing_account.rs @@ -0,0 +1,98 @@ +use crate::{ + db::DbTool, + dirs::{DbPath, PlatformPath}, + dump_stage::setup, +}; +use eyre::Result; +use reth_db::{database::Database, table::TableImporter, tables, transaction::DbTx}; +use reth_provider::Transaction; +use reth_stages::{stages::AccountHashingStage, Stage, StageId, UnwindInput}; +use std::ops::DerefMut; +use tracing::info; + +pub(crate) async fn dump_hashing_account_stage( + db_tool: &mut DbTool<'_, DB>, + from: u64, + to: u64, + output_db: &PlatformPath, + should_run: bool, +) -> Result<()> { + let (output_db, tip_block_number) = setup::(from, to, output_db, db_tool)?; + + // Import relevant AccountChangeSets + let tx = db_tool.db.tx()?; + let from_transition_rev = + tx.get::(from)?.expect("there should be at least one."); + let to_transition_rev = + tx.get::(to)?.expect("there should be at least one."); + output_db.update(|tx| { + tx.import_table_with_range::( + &db_tool.db.tx()?, + Some(from_transition_rev), + to_transition_rev, + ) + })??; + + unwind_and_copy::(db_tool, from, tip_block_number, &output_db).await?; + + if should_run { + println!("\n# AccountHashing stage does not support dry run, so it will actually be committing changes."); + run(output_db, to, from).await?; + } + + Ok(()) +} + +/// Dry-run an unwind to FROM block and copy the necessary table data to the new database. +async fn unwind_and_copy( + db_tool: &mut DbTool<'_, DB>, + from: u64, + tip_block_number: u64, + output_db: &reth_db::mdbx::Env, +) -> eyre::Result<()> { + let mut unwind_tx = Transaction::new(db_tool.db)?; + let mut exec_stage = AccountHashingStage::default(); + + exec_stage + .unwind( + &mut unwind_tx, + UnwindInput { unwind_to: from, stage_progress: tip_block_number, bad_block: None }, + ) + .await?; + let unwind_inner_tx = unwind_tx.deref_mut(); + + output_db.update(|tx| tx.import_table::(unwind_inner_tx))??; + + unwind_tx.drop()?; + + Ok(()) +} + +/// Try to re-execute the stage straightaway +async fn run( + output_db: reth_db::mdbx::Env, + to: u64, + from: u64, +) -> eyre::Result<()> { + info!(target: "reth::cli", "Executing stage."); + + let mut tx = Transaction::new(&output_db)?; + let mut exec_stage = AccountHashingStage { + clean_threshold: 1, // Forces hashing from scratch + ..Default::default() + }; + + exec_stage + .execute( + &mut tx, + reth_stages::ExecInput { + previous_stage: Some((StageId("Another"), to)), + stage_progress: Some(from), + }, + ) + .await?; + + info!(target: "reth::cli", "Success."); + + Ok(()) +} diff --git a/bin/reth/src/dump_stage/mod.rs b/bin/reth/src/dump_stage/mod.rs index 78cac15ce3..0bc3863a34 100644 --- a/bin/reth/src/dump_stage/mod.rs +++ b/bin/reth/src/dump_stage/mod.rs @@ -2,6 +2,9 @@ mod hashing_storage; use hashing_storage::dump_hashing_storage_stage; +mod hashing_account; +use hashing_account::dump_hashing_account_stage; + mod execution; use execution::dump_execution_stage; @@ -40,6 +43,8 @@ pub enum Stages { Execution(StageCommand), /// StorageHashing stage. StorageHashing(StageCommand), + /// AccountHashing stage. + AccountHashing(StageCommand), } /// Stage command that takes a range @@ -86,6 +91,9 @@ impl Command { Stages::StorageHashing(StageCommand { output_db, from, to, dry_run, .. }) => { dump_hashing_storage_stage(&mut tool, *from, *to, output_db, *dry_run).await? } + Stages::AccountHashing(StageCommand { output_db, from, to, dry_run, .. }) => { + dump_hashing_account_stage(&mut tool, *from, *to, output_db, *dry_run).await? + } } Ok(()) diff --git a/crates/interfaces/src/test_utils/generators.rs b/crates/interfaces/src/test_utils/generators.rs index 5e756e668d..4d0b7fa07f 100644 --- a/crates/interfaces/src/test_utils/generators.rs +++ b/crates/interfaces/src/test_utils/generators.rs @@ -175,7 +175,7 @@ pub fn random_eoa_account() -> (Address, Account) { } /// Generate random Externaly Owned Accounts -pub fn random_eoa_account_range(acc_range: &mut std::ops::Range) -> Vec<(Address, Account)> { +pub fn random_eoa_account_range(acc_range: std::ops::Range) -> Vec<(Address, Account)> { let mut accounts = Vec::with_capacity(acc_range.end.saturating_sub(acc_range.start) as usize); for _ in acc_range { accounts.push(random_eoa_account()) diff --git a/crates/stages/Cargo.toml b/crates/stages/Cargo.toml index a5fb4078d6..2a13ad9e3f 100644 --- a/crates/stages/Cargo.toml +++ b/crates/stages/Cargo.toml @@ -64,9 +64,11 @@ rand = "0.8.5" paste = "1.0" # Stage benchmarks +pprof = { version = "0.11", features = ["flamegraph", "frame-pointer", "criterion"] } criterion = { version = "0.4.0", features = ["async_futures"] } proptest = { version = "1.0" } arbitrary = { version = "1.1.7", features = ["derive"] } +eyre = "0.6.8" # trie reth-staged-sync = { path = "../staged-sync" } diff --git a/crates/stages/benches/README.md b/crates/stages/benches/README.md index 085994614a..cd3febde41 100644 --- a/crates/stages/benches/README.md +++ b/crates/stages/benches/README.md @@ -1,8 +1,22 @@ # Stage Benchmarks -Test vectors are automatically generated if they cannot be found. +Test vectors are automatically generated if they cannot be found. Futhermore, for certain stages you can link an external database to run the benchmarks. ## Usage + +It will run the normal criterion benchmark. ``` cargo bench --package reth-stages --bench criterion --features test-utils -``` \ No newline at end of file +``` + +It will generate a flamegraph report without running any criterion analysis. +``` +cargo bench --package reth-stages --bench criterion --features test-utils -- --profile-time=2 +``` +Flamegraph reports can be find at `target/criterion/Stages/$STAGE_LABEL/profile/flamegraph.svg` + + +## External DB support +To choose an external DB, just pass an environment variable to the `cargo bench` command. + +* Account Hashing Stage: `ACCOUNT_HASHING_DB=` \ No newline at end of file diff --git a/crates/stages/benches/criterion.rs b/crates/stages/benches/criterion.rs index 4556f18e5c..d9fcb100e8 100644 --- a/crates/stages/benches/criterion.rs +++ b/crates/stages/benches/criterion.rs @@ -2,18 +2,36 @@ use criterion::{ async_executor::FuturesExecutor, criterion_group, criterion_main, measurement::WallTime, BenchmarkGroup, Criterion, }; +use pprof::criterion::{Output, PProfProfiler}; use reth_db::mdbx::{Env, WriteMap}; -use reth_primitives::H256; use reth_stages::{ stages::{SenderRecoveryStage, TotalDifficultyStage, TransactionLookupStage}, test_utils::TestTransaction, ExecInput, Stage, StageId, UnwindInput, }; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; -criterion_group!(benches, tx_lookup, senders, total_difficulty); +mod setup; + +criterion_group! { + name = benches; + config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); + targets = transaction_lookup, account_hashing, senders, total_difficulty +} criterion_main!(benches); +fn account_hashing(c: &mut Criterion) { + let mut group = c.benchmark_group("Stages"); + + // don't need to run each stage for that many times + group.sample_size(10); + + let num_blocks = 10_000; + let (path, stage, execution_range) = setup::prepare_account_hashing(num_blocks); + + measure_stage_with_path(&mut group, stage, path, "AccountHashing".to_string(), execution_range); +} + fn senders(c: &mut Criterion) { let mut group = c.benchmark_group("Stages"); @@ -29,7 +47,7 @@ fn senders(c: &mut Criterion) { } } -fn tx_lookup(c: &mut Criterion) { +fn transaction_lookup(c: &mut Criterion) { let mut group = c.benchmark_group("Stages"); // don't need to run each stage for that many times @@ -52,17 +70,15 @@ fn total_difficulty(c: &mut Criterion) { measure_stage(&mut group, stage, num_blocks, "TotalDifficulty".to_string()); } -fn measure_stage>>( +fn measure_stage_with_path>>( group: &mut BenchmarkGroup, stage: S, - num_blocks: u64, + path: PathBuf, label: String, + stage_range: (ExecInput, UnwindInput), ) { - let path = txs_testdata(num_blocks as usize); let tx = TestTransaction::new(&path); - - let mut input = ExecInput::default(); - input.previous_stage = Some((StageId("Another"), num_blocks)); + let (input, unwind) = stage_range; group.bench_function(label, move |b| { b.to_async(FuturesExecutor).iter_with_setup( @@ -73,7 +89,16 @@ fn measure_stage>>( let mut db_tx = tx.inner(); // Clear previous run - stage.unwind(&mut db_tx, UnwindInput::default()).await.unwrap(); + stage + .unwind(&mut db_tx, unwind) + .await + .map_err(|e| { + eyre::eyre!(format!( + "{e}\nMake sure your test database at `{}` isn't too old and incompatible with newer stage changes.", + path.display() + )) + }) + .unwrap(); db_tx.commit().unwrap(); }); @@ -88,38 +113,25 @@ fn measure_stage>>( }); } -use reth_interfaces::test_utils::generators::random_block_range; +fn measure_stage>>( + group: &mut BenchmarkGroup, + stage: S, + num_blocks: u64, + label: String, +) { + let path = setup::txs_testdata(num_blocks as usize); -// Helper for generating testdata for the sender recovery stage and tx lookup stages (512MB). -// Returns the path to the database file and the number of blocks written. -fn txs_testdata(num_blocks: usize) -> PathBuf { - let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata").join("txs-bench"); - let txs_range = 100..150; - - if !path.exists() { - // create the dirs - std::fs::create_dir_all(&path).unwrap(); - println!("Transactions testdata not found, generating to {:?}", path.display()); - let tx = TestTransaction::new(&path); - - // This takes a while because it does sig recovery internally - let blocks = random_block_range(0..num_blocks as u64 + 1, H256::zero(), txs_range); - - // insert all blocks - tx.insert_blocks(blocks.iter(), None).unwrap(); - - // // initialize TD - use reth_db::{ - cursor::DbCursorRO, - tables, - transaction::{DbTx, DbTxMut}, - }; - tx.commit(|tx| { - let (head, _) = tx.cursor_read::()?.first()?.unwrap_or_default(); - tx.put::(head, reth_primitives::U256::from(0).into()) - }) - .unwrap(); - } - - path + measure_stage_with_path( + group, + stage, + path, + label, + ( + ExecInput { + previous_stage: Some((StageId("Another"), num_blocks)), + ..Default::default() + }, + UnwindInput::default(), + ), + ) } diff --git a/crates/stages/benches/setup/account_hashing.rs b/crates/stages/benches/setup/account_hashing.rs new file mode 100644 index 0000000000..ce6f152a87 --- /dev/null +++ b/crates/stages/benches/setup/account_hashing.rs @@ -0,0 +1,85 @@ +use super::constants; +use reth_db::{ + cursor::DbCursorRO, database::Database, tables, transaction::DbTx, Error as DbError, +}; +use reth_stages::{ + stages::{AccountHashingStage, SeedOpts}, + test_utils::TestTransaction, + ExecInput, StageId, UnwindInput, +}; +use std::path::{Path, PathBuf}; + +/// Prepares a database for [`AccountHashingStage`] +/// If the environment variable [`constants::ACCOUNT_HASHING_DB`] is set, it will use that one and +/// will get the stage execution range from [`tables::BlockTransitionIndex`]. Otherwise, it will +/// generate its own random data. +/// +/// Returns the path to the database file, stage and range of stage execution if it exists. +pub fn prepare_account_hashing( + num_blocks: u64, +) -> (PathBuf, AccountHashingStage, (ExecInput, UnwindInput)) { + let (path, stage_range) = match std::env::var(constants::ACCOUNT_HASHING_DB) { + Ok(db) => { + let path = Path::new(&db).to_path_buf(); + let range = find_stage_range(&path); + (path, range) + } + Err(_) => generate_testdata_db(num_blocks), + }; + + (path, AccountHashingStage::default(), stage_range) +} + +fn find_stage_range(db: &PathBuf) -> (ExecInput, UnwindInput) { + let mut stage_range = None; + TestTransaction::new(db) + .tx + .view(|tx| { + let mut cursor = tx.cursor_read::()?; + let from = cursor.first()?.unwrap().0; + let to = cursor.last()?.unwrap().0; + + stage_range = Some(( + ExecInput { + previous_stage: Some((StageId("Another"), to)), + stage_progress: Some(from), + }, + UnwindInput { unwind_to: from, stage_progress: to, bad_block: None }, + )); + Ok::<(), DbError>(()) + }) + .unwrap() + .unwrap(); + + stage_range.expect("Could not find the stage range from the external DB.") +} + +fn generate_testdata_db(num_blocks: u64) -> (PathBuf, (ExecInput, UnwindInput)) { + let opts = SeedOpts { + blocks: 0..num_blocks + 1, + accounts: 0..10_000, + txs: 100..150, + transitions: 10_000 + 1, + }; + + let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata").join("account-hashing-bench"); + + if !path.exists() { + // create the dirs + std::fs::create_dir_all(&path).unwrap(); + println!("Account Hashing testdata not found, generating to {:?}", path.display()); + let tx = TestTransaction::new(&path); + let mut tx = tx.inner(); + let _accounts = AccountHashingStage::seed(&mut tx, opts); + } + ( + path, + ( + ExecInput { + previous_stage: Some((StageId("Another"), num_blocks)), + ..Default::default() + }, + UnwindInput::default(), + ), + ) +} diff --git a/crates/stages/benches/setup/constants.rs b/crates/stages/benches/setup/constants.rs new file mode 100644 index 0000000000..17b3f711de --- /dev/null +++ b/crates/stages/benches/setup/constants.rs @@ -0,0 +1 @@ +pub const ACCOUNT_HASHING_DB: &str = "ACCOUNT_HASHING_DB"; diff --git a/crates/stages/benches/setup/mod.rs b/crates/stages/benches/setup/mod.rs new file mode 100644 index 0000000000..7f7932bfed --- /dev/null +++ b/crates/stages/benches/setup/mod.rs @@ -0,0 +1,43 @@ +use reth_db::{ + cursor::DbCursorRO, + tables, + transaction::{DbTx, DbTxMut}, +}; +use reth_interfaces::test_utils::generators::random_block_range; +use reth_primitives::H256; +use reth_stages::test_utils::TestTransaction; +use std::path::{Path, PathBuf}; + +mod constants; + +mod account_hashing; +pub use account_hashing::*; + +// Helper for generating testdata for the sender recovery stage and tx lookup stages (512MB). +// Returns the path to the database file and the number of blocks written. +pub fn txs_testdata(num_blocks: usize) -> PathBuf { + let path = Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata").join("txs-bench"); + let txs_range = 100..150; + + if !path.exists() { + // create the dirs + std::fs::create_dir_all(&path).unwrap(); + println!("Transactions testdata not found, generating to {:?}", path.display()); + let tx = TestTransaction::new(&path); + + // This takes a while because it does sig recovery internally + let blocks = random_block_range(0..num_blocks as u64 + 1, H256::zero(), txs_range); + + // insert all blocks + tx.insert_blocks(blocks.iter(), None).unwrap(); + + // initialize TD + tx.commit(|tx| { + let (head, _) = tx.cursor_read::()?.first()?.unwrap_or_default(); + tx.put::(head, reth_primitives::U256::from(0).into()) + }) + .unwrap(); + } + + path +} diff --git a/crates/stages/src/stages/hashing_account.rs b/crates/stages/src/stages/hashing_account.rs index 8fba0a2eaa..ffba350c87 100644 --- a/crates/stages/src/stages/hashing_account.rs +++ b/crates/stages/src/stages/hashing_account.rs @@ -10,6 +10,7 @@ use reth_provider::Transaction; use std::{ collections::{BTreeMap, BTreeSet}, fmt::Debug, + ops::Range, }; use tracing::*; @@ -18,7 +19,7 @@ pub const ACCOUNT_HASHING: StageId = StageId("AccountHashing"); /// Account hashing stage hashes plain account. /// This is preparation before generating intermediate hashes and calculating Merkle tree root. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct AccountHashingStage { /// The threshold (in number of state transitions) for switching between incremental /// hashing and full storage hashing. @@ -33,6 +34,88 @@ impl Default for AccountHashingStage { } } +#[derive(Clone, Debug)] +/// `SeedOpts` provides configuration parameters for calling `AccountHashingStage::seed` +/// in unit tests or benchmarks to generate an initial database state for running the +/// stage. +/// +/// In order to check the "full hashing" mode of the stage you want to generate more +/// transitions than `AccountHashingStage.clean_threshold`. This requires: +/// 1. Creating enough blocks + transactions so there's enough transactions to generate +/// the required transition keys in the `BlockTransitionIndex` (which depends on the +/// `TxTransitionIndex` internally) +/// 2. Setting `transitions > clean_threshold` so that there's enough diffs to actually +/// take the 2nd codepath +pub struct SeedOpts { + /// The range of blocks to be generated + pub blocks: Range, + /// The range of accounts to be generated + pub accounts: Range, + /// The range of transactions to be generated per block. + pub txs: Range, + /// The number of transitions to go back, capped at the number of total txs + pub transitions: u64, +} + +#[cfg(any(test, feature = "test-utils"))] +impl AccountHashingStage { + /// Initializes the `PlainAccountState` table with `num_accounts` having some random state + /// at the target block, with `txs_range` transactions in each block. + /// + /// Proceeds to go to the `BlockTransitionIndex` end, go back `transitions` and change the + /// account state in the `AccountChangeSet` table. + pub fn seed( + tx: &mut Transaction<'_, DB>, + opts: SeedOpts, + ) -> Result, StageError> { + use reth_db::models::AccountBeforeTx; + use reth_interfaces::test_utils::generators::{ + random_block_range, random_eoa_account_range, + }; + use reth_primitives::{H256, U256}; + use reth_provider::insert_canonical_block; + + let blocks = random_block_range(opts.blocks, H256::zero(), opts.txs); + let num_transitions = blocks.iter().map(|b| b.body.len() as u64).sum(); + let transitions = std::cmp::min(opts.transitions, num_transitions); + + for block in blocks { + insert_canonical_block(&**tx, &block, true).unwrap(); + } + let mut accounts = random_eoa_account_range(opts.accounts); + { + // Account State generator + let mut account_cursor = tx.cursor_write::()?; + accounts.sort_by(|a, b| a.0.cmp(&b.0)); + for (addr, acc) in accounts.iter() { + account_cursor.append(*addr, *acc)?; + } + + // seed account changeset + let (_, last_transition) = + tx.cursor_read::()?.last()?.unwrap(); + + let first_transition = last_transition.checked_sub(transitions).unwrap_or_default(); + + let mut acc_changeset_cursor = tx.cursor_write::()?; + for (t, (addr, acc)) in (first_transition..last_transition).zip(&accounts) { + let Account { nonce, balance, .. } = acc; + let prev_acc = Account { + nonce: nonce - 1, + balance: balance - U256::from(1), + bytecode_hash: None, + }; + let acc_before_tx = AccountBeforeTx { address: *addr, info: Some(prev_acc) }; + acc_changeset_cursor.append(t, acc_before_tx)?; + } + } + + tx.commit()?; + + Ok(accounts) + } +} + #[async_trait::async_trait] impl Stage for AccountHashingStage { /// Return the id of the stage @@ -73,14 +156,21 @@ impl Stage for AccountHashingStage { .map(|res| res.map(|(address, account)| (keccak256(address), account))) .collect::, _>>()?; - // next key of iterator - let next_key = accounts.next()?; + let mut hashed_account_cursor = tx.cursor_write::()?; // iterate and put presorted hashed accounts - hashed_batch - .into_iter() - .try_for_each(|(k, v)| tx.put::(k, v))?; - next_key + if first_key.is_none() { + hashed_batch + .into_iter() + .try_for_each(|(k, v)| hashed_account_cursor.append(k, v))?; + } else { + hashed_batch + .into_iter() + .try_for_each(|(k, v)| hashed_account_cursor.insert(k, v))?; + } + + // next key of iterator + accounts.next()? }; tx.commit()?; if let Some((next_key, _)) = next_key { @@ -184,9 +274,7 @@ mod tests { PREV_STAGE_ID, }; use assert_matches::assert_matches; - use reth_interfaces::test_utils::generators::random_block_range; - use reth_primitives::{Account, SealedBlock, H256, U256}; - use reth_provider::insert_canonical_block; + use reth_primitives::{Account, U256}; use test_utils::*; stage_test_suite_ext!(AccountHashingTestRunner, account_hashing); @@ -221,13 +309,7 @@ mod tests { test_utils::{StageTestRunner, TestTransaction}, ExecInput, ExecOutput, UnwindInput, }; - use reth_db::{ - cursor::DbCursorRO, - models::AccountBeforeTx, - tables, - transaction::{DbTx, DbTxMut}, - }; - use reth_interfaces::test_utils::generators::random_eoa_account_range; + use reth_db::{cursor::DbCursorRO, tables, transaction::DbTx}; pub(crate) struct AccountHashingTestRunner { pub(crate) tx: TestTransaction, @@ -245,34 +327,6 @@ mod tests { self.commit_threshold = threshold; } - pub(crate) fn insert_blocks( - &self, - blocks: Vec, - ) -> Result<(), TestRunnerError> { - for block in blocks.iter() { - self.tx.commit(|tx| { - insert_canonical_block(tx, block, true).unwrap(); - Ok(()) - })?; - } - - Ok(()) - } - - pub(crate) fn insert_accounts( - &self, - accounts: &[(Address, Account)], - ) -> Result<(), TestRunnerError> { - for (addr, acc) in accounts.iter() { - self.tx.commit(|tx| { - tx.put::(*addr, *acc)?; - Ok(()) - })?; - } - - Ok(()) - } - /// Iterates over PlainAccount table and checks that the accounts match the ones /// in the HashedAccount table pub(crate) fn check_hashed_accounts(&self) -> Result<(), TestRunnerError> { @@ -348,41 +402,16 @@ mod tests { type Seed = Vec<(Address, Account)>; fn seed_execution(&mut self, input: ExecInput) -> Result { - let end = input.previous_stage_progress() + 1; - - let blocks = random_block_range(0..end, H256::zero(), 0..3); - self.insert_blocks(blocks)?; - - let n_accounts = 2; - let accounts = random_eoa_account_range(&mut (0..n_accounts)); - self.insert_accounts(&accounts)?; - - // seed account changeset - self.tx - .commit(|tx| { - let (_, last_transition) = - tx.cursor_read::()?.last()?.unwrap(); - - let first_transition = - last_transition.checked_sub(n_accounts).unwrap_or_default(); - - for (t, (addr, acc)) in (first_transition..last_transition).zip(&accounts) { - let Account { nonce, balance, .. } = acc; - let prev_acc = Account { - nonce: nonce - 1, - balance: balance - U256::from(1), - bytecode_hash: None, - }; - let acc_before_tx = - AccountBeforeTx { address: *addr, info: Some(prev_acc) }; - tx.put::(t, acc_before_tx)?; - } - - Ok(()) - }) - .unwrap(); - - Ok(accounts) + Ok(AccountHashingStage::seed( + &mut self.tx.inner(), + SeedOpts { + blocks: 0..input.previous_stage_progress() + 1, + accounts: 0..2, + txs: 0..3, + transitions: 2, + }, + ) + .unwrap()) } fn validate_execution(