From a6366de1cd6f507ca2098b0a9f541b463e32f17b Mon Sep 17 00:00:00 2001 From: Roman Krasiuk Date: Mon, 10 Apr 2023 19:18:19 +0300 Subject: [PATCH] feat(trie): trie nodes (#2174) --- Cargo.lock | 5 ++ crates/trie/Cargo.toml | 10 ++++ crates/trie/src/lib.rs | 3 ++ crates/trie/src/nodes/branch.rs | 73 +++++++++++++++++++++++++++ crates/trie/src/nodes/extension.rs | 54 ++++++++++++++++++++ crates/trie/src/nodes/leaf.rs | 80 ++++++++++++++++++++++++++++++ crates/trie/src/nodes/mod.rs | 29 +++++++++++ 7 files changed, 254 insertions(+) create mode 100644 crates/trie/src/nodes/branch.rs create mode 100644 crates/trie/src/nodes/extension.rs create mode 100644 crates/trie/src/nodes/leaf.rs create mode 100644 crates/trie/src/nodes/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 354c4acdbc..27c918a742 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5310,6 +5310,11 @@ dependencies = [ [[package]] name = "reth-trie" version = "0.1.0" +dependencies = [ + "hex", + "reth-primitives", + "reth-rlp", +] [[package]] name = "revm" diff --git a/crates/trie/Cargo.toml b/crates/trie/Cargo.toml index a4284c7a0c..5d09bd5374 100644 --- a/crates/trie/Cargo.toml +++ b/crates/trie/Cargo.toml @@ -10,3 +10,13 @@ Merkle trie implementation """ [dependencies] +# reth +reth-primitives = { path = "../primitives" } +reth-rlp = { path = "../rlp" } + +# misc +hex = "0.4" + +[dev-dependencies] +# reth +reth-primitives = { path = "../primitives", features = ["test-utils"] } \ No newline at end of file diff --git a/crates/trie/src/lib.rs b/crates/trie/src/lib.rs index 2411c6dc79..68aaddfaa7 100644 --- a/crates/trie/src/lib.rs +++ b/crates/trie/src/lib.rs @@ -8,3 +8,6 @@ //! The implementation of Merkle Patricia Trie, a cryptographically //! authenticated radix trie that is used to store key-value bindings. //! + +/// Various branch nodes producde by the hash builder. +pub mod nodes; diff --git a/crates/trie/src/nodes/branch.rs b/crates/trie/src/nodes/branch.rs new file mode 100644 index 0000000000..c533c45924 --- /dev/null +++ b/crates/trie/src/nodes/branch.rs @@ -0,0 +1,73 @@ +use super::{matches_mask, rlp_node}; +use reth_primitives::{bytes::BytesMut, H256}; +use reth_rlp::{BufMut, EMPTY_STRING_CODE}; + +/// A Branch node is only a pointer to the stack of nodes and is used to +/// create the RLP encoding of the node using masks which filter from +/// the stack of nodes. +#[derive(Clone, Debug)] +pub struct BranchNode<'a> { + /// Rlp encoded children + pub stack: &'a [Vec], +} + +impl<'a> BranchNode<'a> { + /// Create a new branch node from the stack of nodes. + pub fn new(stack: &'a [Vec]) -> Self { + Self { stack } + } + + /// Given the hash and state mask of children present, return an iterator over the stack items + /// that match the mask. + pub fn children(&self, state_mask: u16, hash_mask: u16) -> impl Iterator + '_ { + let mut index = self.stack.len() - state_mask.count_ones() as usize; + (0..16).filter_map(move |digit| { + let mut child = None; + if matches_mask(state_mask, digit) { + if matches_mask(hash_mask, digit) { + child = Some(&self.stack[index]); + } + index += 1; + } + child.map(|child| H256::from_slice(&child[1..])) + }) + } + + /// Returns the RLP encoding of the branch node given the state mask of children present. + pub fn rlp(&self, state_mask: u16) -> Vec { + let first_child_idx = self.stack.len() - state_mask.count_ones() as usize; + let mut buf = BytesMut::new(); + + // Create the RLP header from the mask elements present. + let mut i = first_child_idx; + let header = (0..16).fold( + reth_rlp::Header { list: true, payload_length: 1 }, + |mut header, digit| { + if matches_mask(state_mask, digit) { + header.payload_length += self.stack[i].len(); + i += 1; + } else { + header.payload_length += 1; + } + header + }, + ); + header.encode(&mut buf); + + // Extend the RLP buffer with the present children + let mut i = first_child_idx; + (0..16).for_each(|idx| { + if matches_mask(state_mask, idx) { + buf.extend_from_slice(&self.stack[i]); + i += 1; + } else { + buf.put_u8(EMPTY_STRING_CODE) + } + }); + + // Is this needed? + buf.put_u8(EMPTY_STRING_CODE); + + rlp_node(&buf) + } +} diff --git a/crates/trie/src/nodes/extension.rs b/crates/trie/src/nodes/extension.rs new file mode 100644 index 0000000000..024dc48ed6 --- /dev/null +++ b/crates/trie/src/nodes/extension.rs @@ -0,0 +1,54 @@ +use super::rlp_node; +use reth_primitives::{bytes::BytesMut, trie::Nibbles}; +use reth_rlp::{BufMut, Encodable}; + +/// An intermediate node that exists solely to compress the trie's paths. It contains a path segment +/// (a shared prefix of keys) and a single child pointer. Essentially, an extension node can be +/// thought of as a shortcut within the trie to reduce its overall depth. +/// +/// The purpose of an extension node is to optimize the trie structure by collapsing multiple nodes +/// with a single child into one node. This simplification reduces the space and computational +/// complexity when performing operations on the trie. +pub struct ExtensionNode<'a> { + /// A common prefix for keys. + pub prefix: Vec, + /// A pointer to the child. + pub node: &'a [u8], +} + +impl<'a> ExtensionNode<'a> { + /// Creates a new extension node with the given prefix and child. + pub fn new(prefix: &Nibbles, node: &'a [u8]) -> Self { + Self { prefix: prefix.encode_path_leaf(false), node } + } + + /// RLP encodes the node and returns either RLP(Node) or RLP(keccak(RLP(node))). + pub fn rlp(&self) -> Vec { + let mut buf = BytesMut::new(); + self.encode(&mut buf); + rlp_node(&buf) + } +} + +impl Encodable for ExtensionNode<'_> { + fn encode(&self, out: &mut dyn BufMut) { + let h = reth_rlp::Header { + list: true, + payload_length: self.prefix.as_slice().length() + self.node.len(), + }; + h.encode(out); + // Slices have different RLP encoding from Vectors so we need to `as_slice() + self.prefix.as_slice().encode(out); + // The nodes are already RLP encoded + out.put_slice(self.node); + } +} + +impl std::fmt::Debug for ExtensionNode<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ExtensionNode") + .field("prefix", &hex::encode(&self.prefix)) + .field("node", &hex::encode(self.node)) + .finish() + } +} diff --git a/crates/trie/src/nodes/leaf.rs b/crates/trie/src/nodes/leaf.rs new file mode 100644 index 0000000000..7bfb174047 --- /dev/null +++ b/crates/trie/src/nodes/leaf.rs @@ -0,0 +1,80 @@ +use super::rlp_node; +use reth_primitives::{bytes::BytesMut, trie::Nibbles}; +use reth_rlp::{BufMut, Encodable}; + +/// A leaf node represents the endpoint or terminal node in the trie. In other words, a leaf node is +/// where actual values are stored. +/// +/// A leaf node consists of two parts: the key (or path) and the value. The key is typically the +/// remaining portion of the key after following the path through the trie, and the value is the +/// data associated with the full key. When searching the trie for a specific key, reaching a leaf +/// node means that the search has successfully found the value associated with that key. +#[derive(Default)] +pub struct LeafNode<'a> { + /// The key path. + pub key: Vec, + /// value: SmallVec<[u8; 36]> + pub value: &'a [u8], +} + +impl<'a> LeafNode<'a> { + /// Creates a new leaf node with the given key and value. + pub fn new(key: &Nibbles, value: &'a [u8]) -> Self { + Self { key: key.encode_path_leaf(true), value } + } + + /// RLP encodes the node and returns either RLP(Node) or RLP(keccak(RLP(node))) + /// depending on if the serialized node was longer than a keccak). + pub fn rlp(&self) -> Vec { + let mut out = BytesMut::new(); + self.encode(&mut out); + rlp_node(&out) + } +} + +// Handroll because `key` must be encoded as a slice +impl Encodable for LeafNode<'_> { + fn encode(&self, out: &mut dyn BufMut) { + #[derive(reth_rlp::RlpEncodable)] + struct S<'a> { + encoded_path: &'a [u8], + value: &'a [u8], + } + S { encoded_path: &self.key, value: self.value }.encode(out); + } +} + +impl std::fmt::Debug for LeafNode<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("LeafNode") + .field("key", &hex::encode(&self.key)) + .field("value", &hex::encode(self.value)) + .finish() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use reth_primitives::hex_literal::hex; + + // From manual regression test + #[test] + fn encode_leaf_node_nibble() { + let nibble = Nibbles { hex_data: hex!("0604060f").to_vec() }; + let encoded = nibble.encode_path_leaf(true); + let expected = hex!("20646f").to_vec(); + assert_eq!(encoded, expected); + } + + #[test] + fn rlp_leaf_node_roundtrip() { + let nibble = Nibbles { hex_data: hex!("0604060f").to_vec() }; + let val = hex!("76657262").to_vec(); + let leaf = LeafNode::new(&nibble, &val); + let rlp = leaf.rlp(); + + let expected = hex!("c98320646f8476657262").to_vec(); + assert_eq!(rlp, expected); + } +} diff --git a/crates/trie/src/nodes/mod.rs b/crates/trie/src/nodes/mod.rs new file mode 100644 index 0000000000..b7e3efc29d --- /dev/null +++ b/crates/trie/src/nodes/mod.rs @@ -0,0 +1,29 @@ +use reth_primitives::{keccak256, H256}; +use reth_rlp::EMPTY_STRING_CODE; + +mod branch; +pub use branch::BranchNode; + +mod extension; +pub use extension::ExtensionNode; + +mod leaf; +pub use leaf::LeafNode; + +/// Given an RLP encoded node, returns either RLP(Node) or RLP(keccak(RLP(node))) +fn rlp_node(rlp: &[u8]) -> Vec { + if rlp.len() < H256::len_bytes() { + rlp.to_vec() + } else { + rlp_hash(keccak256(rlp)) + } +} + +/// Optimization for quick encoding of a hash as RLP +pub fn rlp_hash(hash: H256) -> Vec { + [[EMPTY_STRING_CODE + H256::len_bytes() as u8].as_slice(), hash.0.as_slice()].concat() +} + +fn matches_mask(mask: u16, idx: i32) -> bool { + mask & (1u16 << idx) != 0 +}