diff --git a/crates/trie/common/src/proofs.rs b/crates/trie/common/src/proofs.rs index f2d75b2d3f..c3eab92011 100644 --- a/crates/trie/common/src/proofs.rs +++ b/crates/trie/common/src/proofs.rs @@ -1,6 +1,6 @@ //! Merkle trie proofs. -use crate::{BranchNodeMasksMap, Nibbles, TrieAccount}; +use crate::{BranchNodeMasksMap, Nibbles, ProofTrieNode, TrieAccount}; use alloc::{borrow::Cow, vec::Vec}; use alloy_consensus::constants::KECCAK_EMPTY; use alloy_primitives::{ @@ -431,6 +431,33 @@ impl TryFrom for DecodedMultiProof { } } +/// V2 decoded multiproof which contains the results of both account and storage V2 proof +/// calculations. +#[derive(Clone, Debug, PartialEq, Eq, Default)] +pub struct DecodedMultiProofV2 { + /// Account trie proof nodes + pub account_proofs: Vec, + /// Storage trie proof nodes indexed by account + pub storage_proofs: B256Map>, +} + +impl DecodedMultiProofV2 { + /// Returns true if there are no proofs + pub fn is_empty(&self) -> bool { + self.account_proofs.is_empty() && self.storage_proofs.is_empty() + } + + /// Appends the given multiproof's data to this one. + /// + /// This implementation does not deduplicate redundant proofs. + pub fn extend(&mut self, other: Self) { + self.account_proofs.extend(other.account_proofs); + for (hashed_address, other_storage_proofs) in other.storage_proofs { + self.storage_proofs.entry(hashed_address).or_default().extend(other_storage_proofs); + } + } +} + /// The merkle multiproof of storage trie. #[derive(Clone, Debug, PartialEq, Eq)] pub struct StorageMultiProof { diff --git a/crates/trie/parallel/src/lib.rs b/crates/trie/parallel/src/lib.rs index d713ce1520..ba88ab690d 100644 --- a/crates/trie/parallel/src/lib.rs +++ b/crates/trie/parallel/src/lib.rs @@ -22,6 +22,9 @@ pub mod proof; pub mod proof_task; +/// V2 multiproof targets and chunking. +pub mod targets_v2; + /// Parallel state root metrics. #[cfg(feature = "metrics")] pub mod metrics; diff --git a/crates/trie/parallel/src/targets_v2.rs b/crates/trie/parallel/src/targets_v2.rs new file mode 100644 index 0000000000..bda7f0c31f --- /dev/null +++ b/crates/trie/parallel/src/targets_v2.rs @@ -0,0 +1,148 @@ +//! V2 multiproof targets and chunking. + +use alloy_primitives::{map::B256Map, B256}; +use reth_trie::proof_v2; + +/// A set of account and storage V2 proof targets. The account and storage targets do not need to +/// necessarily overlap. +#[derive(Debug, Default)] +pub struct MultiProofTargetsV2 { + /// The set of account proof targets to generate proofs for. + pub account_targets: Vec, + /// The sets of storage proof targets to generate proofs for. + pub storage_targets: B256Map>, +} + +impl MultiProofTargetsV2 { + /// Returns true is there are no account or storage targets. + pub fn is_empty(&self) -> bool { + self.account_targets.is_empty() && self.storage_targets.is_empty() + } +} + +/// An iterator that yields chunks of V2 proof targets of at most `size` account and storage +/// targets. +/// +/// Unlike legacy chunking, V2 preserves account targets exactly as they were (with their `min_len` +/// metadata). Account targets must appear in a chunk. Storage targets for those accounts are +/// chunked together, but if they exceed the chunk size, subsequent chunks contain only the +/// remaining storage targets without repeating the account target. +#[derive(Debug)] +pub struct ChunkedMultiProofTargetsV2 { + /// Remaining account targets to process + account_targets: std::vec::IntoIter, + /// Storage targets by account address + storage_targets: B256Map>, + /// Current account being processed (if any storage slots remain) + current_account_storage: Option<(B256, std::vec::IntoIter)>, + /// Chunk size + size: usize, +} + +impl ChunkedMultiProofTargetsV2 { + /// Creates a new chunked iterator for the given targets. + pub fn new(targets: MultiProofTargetsV2, size: usize) -> Self { + Self { + account_targets: targets.account_targets.into_iter(), + storage_targets: targets.storage_targets, + current_account_storage: None, + size, + } + } +} + +impl Iterator for ChunkedMultiProofTargetsV2 { + type Item = MultiProofTargetsV2; + + fn next(&mut self) -> Option { + let mut chunk = MultiProofTargetsV2::default(); + let mut count = 0; + + // First, finish any remaining storage slots from previous account + if let Some((account_addr, ref mut storage_iter)) = self.current_account_storage { + let remaining_capacity = self.size - count; + let slots: Vec<_> = storage_iter.by_ref().take(remaining_capacity).collect(); + + count += slots.len(); + chunk.storage_targets.insert(account_addr, slots); + + // If iterator is exhausted, clear current_account_storage + if storage_iter.len() == 0 { + self.current_account_storage = None; + } + } + + // Process account targets and their storage + while count < self.size { + let Some(account_target) = self.account_targets.next() else { + break; + }; + + // Add the account target + chunk.account_targets.push(account_target); + count += 1; + + // Check if this account has storage targets + let account_addr = account_target.key(); + if let Some(storage_slots) = self.storage_targets.remove(&account_addr) { + let remaining_capacity = self.size - count; + + if storage_slots.len() <= remaining_capacity { + // Optimization: We can take all slots, just move the vec + count += storage_slots.len(); + chunk.storage_targets.insert(account_addr, storage_slots); + } else { + // We need to split the storage slots + let mut storage_iter = storage_slots.into_iter(); + let slots_in_chunk: Vec<_> = + storage_iter.by_ref().take(remaining_capacity).collect(); + count += slots_in_chunk.len(); + + chunk.storage_targets.insert(account_addr, slots_in_chunk); + + // Save remaining storage slots for next chunk + self.current_account_storage = Some((account_addr, storage_iter)); + break; + } + } + } + + // Process any remaining storage-only entries (accounts not in account_targets) + while let Some((account_addr, storage_slots)) = self.storage_targets.iter_mut().next() && + count < self.size + { + let account_addr = *account_addr; + let storage_slots = std::mem::take(storage_slots); + let remaining_capacity = self.size - count; + + // Always remove from the map - if there are remaining slots they go to + // current_account_storage + self.storage_targets.remove(&account_addr); + + if storage_slots.len() <= remaining_capacity { + // Optimization: We can take all slots, just move the vec + count += storage_slots.len(); + chunk.storage_targets.insert(account_addr, storage_slots); + } else { + // We need to split the storage slots + let mut storage_iter = storage_slots.into_iter(); + let slots_in_chunk: Vec<_> = + storage_iter.by_ref().take(remaining_capacity).collect(); + + chunk.storage_targets.insert(account_addr, slots_in_chunk); + + // Save remaining storage slots for next chunk + if storage_iter.len() > 0 { + self.current_account_storage = Some((account_addr, storage_iter)); + } + break; + } + } + + if chunk.account_targets.is_empty() && chunk.storage_targets.is_empty() { + None + } else { + Some(chunk) + } + } +} diff --git a/crates/trie/sparse/src/state.rs b/crates/trie/sparse/src/state.rs index a1c6ac4e9f..415938915a 100644 --- a/crates/trie/sparse/src/state.rs +++ b/crates/trie/sparse/src/state.rs @@ -316,6 +316,86 @@ where } } + /// Reveals a V2 decoded multiproof. + /// + /// V2 multiproofs use a simpler format where proof nodes are stored as vectors rather than + /// hashmaps, with masks already included in the `ProofTrieNode` structure. + #[instrument( + skip_all, + fields( + account_nodes = multiproof.account_proofs.len(), + storages = multiproof.storage_proofs.len() + ) + )] + pub fn reveal_decoded_multiproof_v2( + &mut self, + multiproof: reth_trie_common::DecodedMultiProofV2, + ) -> SparseStateTrieResult<()> { + // Reveal the account proof nodes + self.reveal_account_v2_proof_nodes(multiproof.account_proofs)?; + + #[cfg(not(feature = "std"))] + // If nostd then serially reveal storage proof nodes for each storage trie + { + for (account, storage_proofs) in multiproof.storage_proofs { + self.reveal_storage_v2_proof_nodes(account, storage_proofs)?; + } + + Ok(()) + } + + #[cfg(feature = "std")] + // If std then reveal storage proofs in parallel + { + use rayon::iter::{ParallelBridge, ParallelIterator}; + + let retain_updates = self.retain_updates; + + // Process all storage trie revealings in parallel, having first removed the + // `reveal_nodes` tracking and `SparseTrie`s for each account from their HashMaps. + // These will be returned after processing. + let results: Vec<_> = multiproof + .storage_proofs + .into_iter() + .map(|(account, storage_proofs)| { + let revealed_nodes = self.storage.take_or_create_revealed_paths(&account); + let trie = self.storage.take_or_create_trie(&account); + (account, storage_proofs, revealed_nodes, trie) + }) + .par_bridge() + .map(|(account, storage_proofs, mut revealed_nodes, mut trie)| { + let result = Self::reveal_storage_v2_proof_nodes_inner( + account, + storage_proofs, + &mut revealed_nodes, + &mut trie, + retain_updates, + ); + (account, result, revealed_nodes, trie) + }) + .collect(); + + let mut any_err = Ok(()); + for (account, result, revealed_nodes, trie) in results { + self.storage.revealed_paths.insert(account, revealed_nodes); + self.storage.tries.insert(account, trie); + if let Ok(_metric_values) = result { + #[cfg(feature = "metrics")] + { + self.metrics + .increment_total_storage_nodes(_metric_values.total_nodes as u64); + self.metrics + .increment_skipped_storage_nodes(_metric_values.skipped_nodes as u64); + } + } else { + any_err = result.map(|_| ()); + } + } + + any_err + } + } + /// Reveals an account multiproof. pub fn reveal_account_multiproof( &mut self, @@ -362,6 +442,89 @@ where Ok(()) } + /// Reveals account proof nodes from a V2 proof. + /// + /// V2 proofs already include the masks in the `ProofTrieNode` structure, + /// so no separate masks map is needed. + pub fn reveal_account_v2_proof_nodes( + &mut self, + nodes: Vec, + ) -> SparseStateTrieResult<()> { + let FilteredV2ProofNodes { root_node, nodes, new_nodes, metric_values: _metric_values } = + filter_revealed_v2_proof_nodes(nodes, &mut self.revealed_account_paths)?; + + #[cfg(feature = "metrics")] + { + self.metrics.increment_total_account_nodes(_metric_values.total_nodes as u64); + self.metrics.increment_skipped_account_nodes(_metric_values.skipped_nodes as u64); + } + + if let Some(root_node) = root_node { + trace!(target: "trie::sparse", ?root_node, "Revealing root account node from V2 proof"); + let trie = + self.state.reveal_root(root_node.node, root_node.masks, self.retain_updates)?; + + trie.reserve_nodes(new_nodes); + + trace!(target: "trie::sparse", total_nodes = ?nodes.len(), "Revealing account nodes from V2 proof"); + trie.reveal_nodes(nodes)?; + } + + Ok(()) + } + + /// Reveals storage proof nodes from a V2 proof for the given address. + /// + /// V2 proofs already include the masks in the `ProofTrieNode` structure, + /// so no separate masks map is needed. + pub fn reveal_storage_v2_proof_nodes( + &mut self, + account: B256, + nodes: Vec, + ) -> SparseStateTrieResult<()> { + let (trie, revealed_paths) = self.storage.get_trie_and_revealed_paths_mut(account); + let _metric_values = Self::reveal_storage_v2_proof_nodes_inner( + account, + nodes, + revealed_paths, + trie, + self.retain_updates, + )?; + + #[cfg(feature = "metrics")] + { + self.metrics.increment_total_storage_nodes(_metric_values.total_nodes as u64); + self.metrics.increment_skipped_storage_nodes(_metric_values.skipped_nodes as u64); + } + + Ok(()) + } + + /// Reveals storage V2 proof nodes for the given address. This is an internal static function + /// designed to handle a variety of associated public functions. + fn reveal_storage_v2_proof_nodes_inner( + account: B256, + nodes: Vec, + revealed_nodes: &mut HashSet, + trie: &mut SparseTrie, + retain_updates: bool, + ) -> SparseStateTrieResult { + let FilteredV2ProofNodes { root_node, nodes, new_nodes, metric_values } = + filter_revealed_v2_proof_nodes(nodes, revealed_nodes)?; + + if let Some(root_node) = root_node { + trace!(target: "trie::sparse", ?account, ?root_node, "Revealing root storage node from V2 proof"); + let trie = trie.reveal_root(root_node.node, root_node.masks, retain_updates)?; + + trie.reserve_nodes(new_nodes); + + trace!(target: "trie::sparse", ?account, total_nodes = ?nodes.len(), "Revealing storage nodes from V2 proof"); + trie.reveal_nodes(nodes)?; + } + + Ok(metric_values) + } + /// Reveals a storage multiproof for the given address. pub fn reveal_storage_multiproof( &mut self, @@ -1000,6 +1163,87 @@ fn filter_map_revealed_nodes( Ok(result) } +/// Result of [`filter_revealed_v2_proof_nodes`]. +#[derive(Debug, PartialEq, Eq)] +struct FilteredV2ProofNodes { + /// Root node which was pulled out of the original node set to be handled specially. + root_node: Option, + /// Filtered proof nodes. Root node is removed. + nodes: Vec, + /// Number of new nodes that will be revealed. This includes all children of branch nodes, even + /// if they are not in the proof. + new_nodes: usize, + /// Values which are being returned so they can be incremented into metrics. + metric_values: ProofNodesMetricValues, +} + +/// Filters V2 proof nodes that are already revealed, separates the root node if present, and +/// returns additional information about the number of total, skipped, and new nodes. +/// +/// Unlike [`filter_map_revealed_nodes`], V2 proof nodes already have masks included in the +/// `ProofTrieNode` structure, so no separate masks map is needed. +fn filter_revealed_v2_proof_nodes( + proof_nodes: Vec, + revealed_nodes: &mut HashSet, +) -> SparseStateTrieResult { + let mut result = FilteredV2ProofNodes { + root_node: None, + nodes: Vec::with_capacity(proof_nodes.len()), + new_nodes: 0, + metric_values: Default::default(), + }; + + // Count non-EmptyRoot nodes for sanity check. When multiple proofs are extended together, + // duplicate EmptyRoot nodes may appear (e.g., storage proofs split across chunks for an + // account with empty storage). We only error if there's an EmptyRoot alongside real nodes. + let non_empty_root_count = + proof_nodes.iter().filter(|n| !matches!(n.node, TrieNode::EmptyRoot)).count(); + + for node in proof_nodes { + result.metric_values.total_nodes += 1; + + let is_root = node.path.is_empty(); + + // If the node is already revealed, skip it. We don't ever skip the root node, nor do we add + // it to `revealed_nodes`. + if !is_root && !revealed_nodes.insert(node.path) { + result.metric_values.skipped_nodes += 1; + continue + } + + result.new_nodes += 1; + + // Count children for capacity estimation + match &node.node { + TrieNode::Branch(branch) => { + result.new_nodes += branch.state_mask.count_ones() as usize; + } + TrieNode::Extension(_) => { + result.new_nodes += 1; + } + _ => {} + }; + + if is_root { + // Perform sanity check: EmptyRoot is only valid if there are no other real nodes. + if matches!(node.node, TrieNode::EmptyRoot) && non_empty_root_count > 0 { + return Err(SparseStateTrieErrorKind::InvalidRootNode { + path: node.path, + node: alloy_rlp::encode(&node.node).into(), + } + .into()) + } + + result.root_node = Some(node); + continue + } + + result.nodes.push(node); + } + + Ok(result) +} + #[cfg(test)] mod tests { use super::*; @@ -1174,6 +1418,127 @@ mod tests { .is_none()); } + #[test] + fn reveal_v2_proof_nodes() { + let provider_factory = DefaultTrieNodeProviderFactory; + let mut sparse = SparseStateTrie::::default(); + + let leaf_value = alloy_rlp::encode(TrieAccount::default()); + let leaf_1_node = TrieNode::Leaf(LeafNode::new(Nibbles::default(), leaf_value.clone())); + let leaf_2_node = TrieNode::Leaf(LeafNode::new(Nibbles::default(), leaf_value.clone())); + + let branch_node = TrieNode::Branch(BranchNode { + stack: vec![ + RlpNode::from_rlp(&alloy_rlp::encode(&leaf_1_node)), + RlpNode::from_rlp(&alloy_rlp::encode(&leaf_2_node)), + ], + state_mask: TrieMask::new(0b11), + }); + + // Create V2 proof nodes with masks already included + let v2_proof_nodes = vec![ + ProofTrieNode { + path: Nibbles::default(), + node: branch_node, + masks: Some(BranchNodeMasks { + hash_mask: TrieMask::default(), + tree_mask: TrieMask::default(), + }), + }, + ProofTrieNode { path: Nibbles::from_nibbles([0x0]), node: leaf_1_node, masks: None }, + ProofTrieNode { path: Nibbles::from_nibbles([0x1]), node: leaf_2_node, masks: None }, + ]; + + // Reveal V2 proof nodes + sparse.reveal_account_v2_proof_nodes(v2_proof_nodes.clone()).unwrap(); + + // Check that the state trie contains the leaf node and value + assert!(sparse + .state_trie_ref() + .unwrap() + .nodes_ref() + .contains_key(&Nibbles::from_nibbles([0x0]))); + assert_eq!( + sparse.state_trie_ref().unwrap().get_leaf_value(&Nibbles::from_nibbles([0x0])), + Some(&leaf_value) + ); + + // Remove the leaf node + sparse.remove_account_leaf(&Nibbles::from_nibbles([0x0]), &provider_factory).unwrap(); + assert!(sparse + .state_trie_ref() + .unwrap() + .get_leaf_value(&Nibbles::from_nibbles([0x0])) + .is_none()); + + // Reveal again - should skip already revealed paths + sparse.reveal_account_v2_proof_nodes(v2_proof_nodes).unwrap(); + assert!(sparse + .state_trie_ref() + .unwrap() + .get_leaf_value(&Nibbles::from_nibbles([0x0])) + .is_none()); + } + + #[test] + fn reveal_storage_v2_proof_nodes() { + let provider_factory = DefaultTrieNodeProviderFactory; + let mut sparse = SparseStateTrie::::default(); + + let storage_value: Vec = alloy_rlp::encode_fixed_size(&U256::from(42)).to_vec(); + let leaf_1_node = TrieNode::Leaf(LeafNode::new(Nibbles::default(), storage_value.clone())); + let leaf_2_node = TrieNode::Leaf(LeafNode::new(Nibbles::default(), storage_value.clone())); + + let branch_node = TrieNode::Branch(BranchNode { + stack: vec![ + RlpNode::from_rlp(&alloy_rlp::encode(&leaf_1_node)), + RlpNode::from_rlp(&alloy_rlp::encode(&leaf_2_node)), + ], + state_mask: TrieMask::new(0b11), + }); + + let v2_proof_nodes = vec![ + ProofTrieNode { path: Nibbles::default(), node: branch_node, masks: None }, + ProofTrieNode { path: Nibbles::from_nibbles([0x0]), node: leaf_1_node, masks: None }, + ProofTrieNode { path: Nibbles::from_nibbles([0x1]), node: leaf_2_node, masks: None }, + ]; + + // Reveal V2 storage proof nodes for account + sparse.reveal_storage_v2_proof_nodes(B256::ZERO, v2_proof_nodes.clone()).unwrap(); + + // Check that the storage trie contains the leaf node and value + assert!(sparse + .storage_trie_ref(&B256::ZERO) + .unwrap() + .nodes_ref() + .contains_key(&Nibbles::from_nibbles([0x0]))); + assert_eq!( + sparse + .storage_trie_ref(&B256::ZERO) + .unwrap() + .get_leaf_value(&Nibbles::from_nibbles([0x0])), + Some(&storage_value) + ); + + // Remove the leaf node + sparse + .remove_storage_leaf(B256::ZERO, &Nibbles::from_nibbles([0x0]), &provider_factory) + .unwrap(); + assert!(sparse + .storage_trie_ref(&B256::ZERO) + .unwrap() + .get_leaf_value(&Nibbles::from_nibbles([0x0])) + .is_none()); + + // Reveal again - should skip already revealed paths + sparse.reveal_storage_v2_proof_nodes(B256::ZERO, v2_proof_nodes).unwrap(); + assert!(sparse + .storage_trie_ref(&B256::ZERO) + .unwrap() + .get_leaf_value(&Nibbles::from_nibbles([0x0])) + .is_none()); + } + #[test] fn take_trie_updates() { reth_tracing::init_test_tracing(); diff --git a/crates/trie/trie/src/proof_v2/target.rs b/crates/trie/trie/src/proof_v2/target.rs index c96b202e85..a969089f3a 100644 --- a/crates/trie/trie/src/proof_v2/target.rs +++ b/crates/trie/trie/src/proof_v2/target.rs @@ -19,6 +19,11 @@ impl Target { Self { key, min_len: 0 } } + /// Returns the key the target was initialized with. + pub fn key(&self) -> B256 { + B256::from_slice(&self.key.pack()) + } + /// Only match trie nodes whose path is at least this long. /// /// # Panics