From 173bcf455a9b9a5da03ef96ab027e455a0a837c3 Mon Sep 17 00:00:00 2001 From: Dan Cline <6798349+Rjected@users.noreply.github.com> Date: Thu, 18 Dec 2025 20:43:41 -0500 Subject: [PATCH] wip: vibed trie cache --- Cargo.lock | 9 +++++ Cargo.toml | 1 + crates/trie/sparse-parallel/src/trie.rs | 16 +++++---- crates/trie/sparse/Cargo.toml | 2 ++ crates/trie/sparse/src/hash_cache.rs | 47 +++++++++++++++++++++++++ crates/trie/sparse/src/lib.rs | 3 ++ crates/trie/sparse/src/trie.rs | 18 ++++++---- 7 files changed, 82 insertions(+), 14 deletions(-) create mode 100644 crates/trie/sparse/src/hash_cache.rs diff --git a/Cargo.lock b/Cargo.lock index 2a7fbe0be0..f29736794b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3986,6 +3986,14 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "fixed-cache" +version = "0.1.2" +source = "git+https://github.com/danipopes/fixed-cache#2324460d2b7c43f7665c37f21e6b3aab6298a1f0" +dependencies = [ + "equivalent", +] + [[package]] name = "fixed-hash" version = "0.8.0" @@ -11029,6 +11037,7 @@ dependencies = [ "assert_matches", "auto_impl", "codspeed-criterion-compat", + "fixed-cache", "itertools 0.14.0", "metrics", "pretty_assertions", diff --git a/Cargo.toml b/Cargo.toml index 8f12a0fb7d..5bbbe67074 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -482,6 +482,7 @@ revm-interpreter = { version = "31.1.0", default-features = false } revm-database-interface = { version = "8.0.5", default-features = false } op-revm = { version = "14.1.0", default-features = false } revm-inspectors = "0.33.2" +fixed-cache = { git = "https://github.com/danipopes/fixed-cache" } # eth alloy-chains = { version = "0.2.5", default-features = false } diff --git a/crates/trie/sparse-parallel/src/trie.rs b/crates/trie/sparse-parallel/src/trie.rs index 3ccc5aad1a..c8e24a873f 100644 --- a/crates/trie/sparse-parallel/src/trie.rs +++ b/crates/trie/sparse-parallel/src/trie.rs @@ -4,7 +4,7 @@ use alloy_primitives::{ map::{Entry, HashMap}, B256, }; -use alloy_rlp::Decodable; +use alloy_rlp::{Decodable, Encodable}; use alloy_trie::{BranchNodeCompact, TrieMask, EMPTY_ROOT_HASH}; use reth_execution_errors::{SparseTrieErrorKind, SparseTrieResult}; use reth_trie_common::{ @@ -14,8 +14,8 @@ use reth_trie_common::{ }; use reth_trie_sparse::{ provider::{RevealedNode, TrieNodeProvider}, - LeafLookup, LeafLookupError, RlpNodeStackItem, SparseNode, SparseNodeType, SparseTrieInterface, - SparseTrieUpdates, + rlp_node_from_rlp_cached, LeafLookup, LeafLookupError, RlpNodeStackItem, SparseNode, + SparseNodeType, SparseTrieInterface, SparseTrieUpdates, }; use smallvec::SmallVec; use std::cmp::{Ord, Ordering, PartialOrd}; @@ -2206,7 +2206,8 @@ impl SparseSubtrieInner { // Encode the leaf node and update its hash let value = self.values.get(&path).unwrap(); self.buffers.rlp_buf.clear(); - let rlp_node = LeafNodeRef { key, value }.rlp(&mut self.buffers.rlp_buf); + LeafNodeRef { key, value }.encode(&mut self.buffers.rlp_buf); + let rlp_node = rlp_node_from_rlp_cached(&self.buffers.rlp_buf); *hash = rlp_node.as_hash(); (rlp_node, SparseNodeType::Leaf) } @@ -2229,8 +2230,8 @@ impl SparseSubtrieInner { let RlpNodeStackItem { path: _, rlp_node: child, node_type: child_node_type } = self.buffers.rlp_node_stack.pop().unwrap(); self.buffers.rlp_buf.clear(); - let rlp_node = - ExtensionNodeRef::new(key, &child).rlp(&mut self.buffers.rlp_buf); + ExtensionNodeRef::new(key, &child).encode(&mut self.buffers.rlp_buf); + let rlp_node = rlp_node_from_rlp_cached(&self.buffers.rlp_buf); *hash = rlp_node.as_hash(); let store_in_db_trie_value = child_node_type.store_in_db_trie(); @@ -2387,7 +2388,8 @@ impl SparseSubtrieInner { self.buffers.rlp_buf.clear(); let branch_node_ref = BranchNodeRef::new(&self.buffers.branch_value_stack_buf, *state_mask); - let rlp_node = branch_node_ref.rlp(&mut self.buffers.rlp_buf); + branch_node_ref.encode(&mut self.buffers.rlp_buf); + let rlp_node = rlp_node_from_rlp_cached(&self.buffers.rlp_buf); *hash = rlp_node.as_hash(); // Save a branch node update only if it's not a root node, and we need to diff --git a/crates/trie/sparse/Cargo.toml b/crates/trie/sparse/Cargo.toml index b2c7ee0f56..26f4c7e0b1 100644 --- a/crates/trie/sparse/Cargo.toml +++ b/crates/trie/sparse/Cargo.toml @@ -27,6 +27,7 @@ alloy-rlp.workspace = true auto_impl.workspace = true rayon = { workspace = true, optional = true } smallvec = { workspace = true, features = ["const_new"] } +fixed-cache = { workspace = true, optional = true } # metrics reth-metrics = { workspace = true, optional = true } @@ -59,6 +60,7 @@ std = [ "alloy-primitives/std", "alloy-rlp/std", "alloy-trie/std", + "dep:fixed-cache", "reth-execution-errors/std", "reth-primitives-traits/std", "reth-storage-api/std", diff --git a/crates/trie/sparse/src/hash_cache.rs b/crates/trie/sparse/src/hash_cache.rs new file mode 100644 index 0000000000..2fd31dbbbd --- /dev/null +++ b/crates/trie/sparse/src/hash_cache.rs @@ -0,0 +1,47 @@ +use alloy_primitives::{keccak256, B256}; +use reth_trie_common::RlpNode; + +#[cfg(feature = "std")] +use fixed_cache::Cache; +#[cfg(feature = "std")] +use std::sync::OnceLock; + +// ~1M entries targets ~128 MiB total usage (key vecs + hashes + bucket overhead) while providing +// good reuse for repeated trie node RLPs during sparse trie hashing. +const TRIE_NODE_HASH_CACHE_SIZE: usize = 1 << 20; + +#[cfg(feature = "std")] +fn trie_node_hash_cache() -> &'static Cache, B256> { + static CACHE: OnceLock, B256>> = OnceLock::new(); + CACHE.get_or_init(|| Cache::new(TRIE_NODE_HASH_CACHE_SIZE, Default::default())) +} + +/// Hashes an RLP-encoded trie node with a fixed-size cache. +pub fn hash_trie_node_cached(rlp: &[u8]) -> B256 { + #[cfg(feature = "std")] + { + let cache = trie_node_hash_cache(); + if let Some(hash) = cache.get(rlp) { + return hash; + } + + let hash = keccak256(rlp); + cache.insert(rlp.to_vec(), hash); + return hash; + } + + #[cfg(not(feature = "std"))] + { + keccak256(rlp) + } +} + +/// Returns `rlp(node)` for short encodings or `rlp(keccak(rlp(node)))` using the cache. +pub fn rlp_node_from_rlp_cached(rlp: &[u8]) -> RlpNode { + if rlp.len() < 32 { + RlpNode::from_raw(rlp).expect("RLP node length already checked") + } else { + let hash = hash_trie_node_cached(rlp); + RlpNode::word_rlp(&hash) + } +} diff --git a/crates/trie/sparse/src/lib.rs b/crates/trie/sparse/src/lib.rs index 6b17597048..5e3debc97e 100644 --- a/crates/trie/sparse/src/lib.rs +++ b/crates/trie/sparse/src/lib.rs @@ -16,6 +16,9 @@ pub use traits::*; pub mod provider; +mod hash_cache; +pub use hash_cache::{hash_trie_node_cached, rlp_node_from_rlp_cached}; + #[cfg(feature = "metrics")] mod metrics; diff --git a/crates/trie/sparse/src/trie.rs b/crates/trie/sparse/src/trie.rs index acad15bc15..aa14a1583d 100644 --- a/crates/trie/sparse/src/trie.rs +++ b/crates/trie/sparse/src/trie.rs @@ -1,6 +1,7 @@ use crate::{ + hash_trie_node_cached, provider::{RevealedNode, TrieNodeProvider}, - LeafLookup, LeafLookupError, SparseTrieInterface, SparseTrieUpdates, + rlp_node_from_rlp_cached, LeafLookup, LeafLookupError, SparseTrieInterface, SparseTrieUpdates, }; use alloc::{ borrow::Cow, @@ -11,11 +12,11 @@ use alloc::{ vec::Vec, }; use alloy_primitives::{ - hex, keccak256, + hex, map::{Entry, HashMap, HashSet}, B256, }; -use alloy_rlp::Decodable; +use alloy_rlp::{Decodable, Encodable}; use reth_execution_errors::{SparseTrieErrorKind, SparseTrieResult}; use reth_trie_common::{ prefix_set::{PrefixSet, PrefixSetMut}, @@ -928,7 +929,7 @@ impl SparseTrieInterface for SerialSparseTrie { if let Some(root_hash) = rlp_node.as_hash() { root_hash } else { - keccak256(rlp_node) + hash_trie_node_cached(rlp_node.as_ref()) } } @@ -1532,7 +1533,8 @@ impl SerialSparseTrie { } else { let value = self.values.get(&path).unwrap(); rlp_buf.clear(); - let rlp_node = LeafNodeRef { key, value }.rlp(rlp_buf); + LeafNodeRef { key, value }.encode(rlp_buf); + let rlp_node = rlp_node_from_rlp_cached(rlp_buf); *hash = rlp_node.as_hash(); (rlp_node, SparseNodeType::Leaf) } @@ -1554,7 +1556,8 @@ impl SerialSparseTrie { node_type: child_node_type, } = buffers.rlp_node_stack.pop().unwrap(); rlp_buf.clear(); - let rlp_node = ExtensionNodeRef::new(key, &child).rlp(rlp_buf); + ExtensionNodeRef::new(key, &child).encode(rlp_buf); + let rlp_node = rlp_node_from_rlp_cached(rlp_buf); *hash = rlp_node.as_hash(); let store_in_db_trie_value = child_node_type.store_in_db_trie(); @@ -1708,7 +1711,8 @@ impl SerialSparseTrie { rlp_buf.clear(); let branch_node_ref = BranchNodeRef::new(&buffers.branch_value_stack_buf, *state_mask); - let rlp_node = branch_node_ref.rlp(rlp_buf); + branch_node_ref.encode(rlp_buf); + let rlp_node = rlp_node_from_rlp_cached(rlp_buf); *hash = rlp_node.as_hash(); // Save a branch node update only if it's not a root node, and we need to