From 790238e120c6f12a03b49346b5beb8930744a766 Mon Sep 17 00:00:00 2001 From: parazyd Date: Thu, 1 May 2025 15:36:10 +0200 Subject: [PATCH] sdk: Implement barebones version of monotree (smt) This is a Sparse Merkle Tree used for contract state proofs. We only support in-memory storage and BLAKE3 as the hasher. Implementation is taken and stripped down from the repository located at and is licensed under the MIT License. --- Cargo.lock | 54 +++- src/sdk/Cargo.toml | 4 +- src/sdk/src/lib.rs | 3 + src/sdk/src/smt_native/bits.rs | 125 ++++++++++ src/sdk/src/smt_native/mod.rs | 46 ++++ src/sdk/src/smt_native/node.rs | 152 ++++++++++++ src/sdk/src/smt_native/tests.rs | 207 +++++++++++++++ src/sdk/src/smt_native/tree.rs | 428 ++++++++++++++++++++++++++++++++ src/sdk/src/smt_native/utils.rs | 179 +++++++++++++ 9 files changed, 1192 insertions(+), 6 deletions(-) create mode 100644 src/sdk/src/smt_native/bits.rs create mode 100644 src/sdk/src/smt_native/mod.rs create mode 100644 src/sdk/src/smt_native/node.rs create mode 100644 src/sdk/src/smt_native/tests.rs create mode 100644 src/sdk/src/smt_native/tree.rs create mode 100644 src/sdk/src/smt_native/utils.rs diff --git a/Cargo.lock b/Cargo.lock index 76190f5dc..68b18682c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -114,6 +114,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "alsa" version = "0.9.1" @@ -2170,7 +2176,9 @@ dependencies = [ "darkfi-serial", "halo2_gadgets", "halo2_proofs", + "hashbrown 0.15.3", "lazy_static", + "num", "num-bigint", "num-traits", "pasta_curves", @@ -3886,10 +3894,12 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" dependencies = [ + "allocator-api2", + "equivalent", "foldhash", "serde", ] @@ -4321,7 +4331,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown 0.15.2", + "hashbrown 0.15.3", "serde", ] @@ -5063,6 +5073,20 @@ dependencies = [ "winapi", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -5090,6 +5114,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + [[package]] name = "num-conv" version = "0.1.0" @@ -5127,6 +5160,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -6430,7 +6474,7 @@ checksum = "1e147371c75553e1e2fcdb483944a8540b8438c31426279553b9a8182a9b7b65" dependencies = [ "bytecheck 0.8.1", "bytes 1.10.1", - "hashbrown 0.15.2", + "hashbrown 0.15.3", "indexmap 2.9.0", "munge", "ptr_meta 0.3.0", @@ -9682,7 +9726,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4abf1132c1fdf747d56bbc1bb52152400c70f336870f968b85e89ea422198ae3" dependencies = [ "bitflags 2.9.0", - "hashbrown 0.15.2", + "hashbrown 0.15.3", "indexmap 2.9.0", "semver 1.0.26", "serde", diff --git a/src/sdk/Cargo.toml b/src/sdk/Cargo.toml index e9a2f9143..ce81aa4f8 100644 --- a/src/sdk/Cargo.toml +++ b/src/sdk/Cargo.toml @@ -25,6 +25,7 @@ darkfi-serial = {version = "0.4.2", features = ["crypto"]} # Encoding bs58 = "0.5.1" +num = "0.4.3" sha2 = "0.10.8" # Cryptography @@ -37,15 +38,16 @@ num-bigint = "0.4.6" num-traits = "0.2.19" pasta_curves = "0.5.1" rand_core = "0.6.4" +rand = "0.8.5" # Misc lazy_static = "1.5.0" subtle = "2.6.1" +hashbrown = "0.15.3" [dev-dependencies] halo2_proofs = {version = "0.3.0", features = ["dev-graph", "sanity-checks"]} halo2_gadgets = {version = "0.3.1", features = ["test-dependencies"]} -rand = "0.8.5" [lints] workspace = true diff --git a/src/sdk/src/lib.rs b/src/sdk/src/lib.rs index a3081028a..a2ddf8b6d 100644 --- a/src/sdk/src/lib.rs +++ b/src/sdk/src/lib.rs @@ -27,6 +27,9 @@ pub mod blockchain; /// DarkTree structures pub mod dark_tree; +/// Native (non-wasm, non-ff) Sparse Merkle Tree +pub mod smt_native; + /// Contract deployment utilities pub mod deploy; diff --git a/src/sdk/src/smt_native/bits.rs b/src/sdk/src/smt_native/bits.rs new file mode 100644 index 000000000..7fe9f89d4 --- /dev/null +++ b/src/sdk/src/smt_native/bits.rs @@ -0,0 +1,125 @@ +/* This file is part of DarkFi (https://dark.fi) + * + * Copyright (C) 2020-2025 Dyne.org foundation + * Copyright (C) 2021 MONOLOG (Taeho Francis Lim and Jongwhan Lee) MIT License + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +use std::{cmp::Ordering, ops::Range}; + +use super::{ + utils::{bit, bytes_to_int, len_lcp, offsets}, + BitsLen, +}; +use crate::GenericResult; + +/// `BitVec` implementation based on bytes slice. +#[derive(Debug, Clone)] +pub struct Bits<'a> { + pub path: &'a [u8], + pub range: Range, +} + +impl<'a> Bits<'a> { + pub fn new(bytes: &'a [u8]) -> Self { + Self { path: bytes, range: 0..(bytes.len() as BitsLen * 8) } + } + + /// Construct `Bits` instance by deserializing bytes slice. + pub fn from_bytes(bytes: &'a [u8]) -> Self { + let u = std::mem::size_of::(); + let start: BitsLen = bytes_to_int(&bytes[..u]); + let end: BitsLen = bytes_to_int(&bytes[u..2 * u]); + Self { path: &bytes[2 * u..], range: start..end } + } + + /// Serialize `Bits` into bytes. + pub fn to_bytes(&self) -> GenericResult> { + Ok([&self.range.start.to_be_bytes(), &self.range.end.to_be_bytes(), self.path].concat()) + } + + /// Get the very first bit. + pub fn first(&self) -> bool { + bit(self.path, self.range.start) + } + + pub fn len(&self) -> BitsLen { + self.range.end - self.range.start + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 || self.path.len() == 0 + } + + /// Get the resulting `Bits` when shifted with the given size. + pub fn shift(&self, n: BitsLen, tail: bool) -> Self { + let (q, range) = offsets(&self.range, n, tail); + if tail { + Self { path: &self.path[..q as usize], range } + } else { + Self { path: &self.path[q as usize..], range } + } + } + + /// Get length of the longest common prefix bits for the given two `Bits`. + pub fn len_common_bits(a: &Self, b: &Self) -> BitsLen { + len_lcp(a.path, &a.range, b.path, &b.range) + } + + /// Get the bit at position `i` within this Bits range + pub fn bit(&self, i: BitsLen) -> bool { + assert!(i < self.len(), "Bit index out of range"); + bit(self.path, self.range.start + i) + } + + /// Compare bits lexicographically (MSB to LSB) + pub fn lexical_cmp(&self, other: &Self) -> Ordering { + let min_len = std::cmp::min(self.len(), other.len()); + + // Compare bit by bit from start of range + for i in 0..min_len { + match (self.bit(i), other.bit(i)) { + (false, true) => return Ordering::Less, + (true, false) => return Ordering::Greater, + _ => continue, + } + } + + // All compared bits equal, compare lengths + self.len().cmp(&other.len()) + } +} + +// Implement equality/ordering based on actual bit values +impl PartialEq for Bits<'_> { + fn eq(&self, other: &Self) -> bool { + self.len() == other.len() && (0..self.len()).all(|i| self.bit(i) == other.bit(i)) + } +} + +impl Eq for Bits<'_> {} + +#[allow(clippy::non_canonical_partial_ord_impl)] +impl PartialOrd for Bits<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.lexical_cmp(other)) + } +} + +impl Ord for Bits<'_> { + fn cmp(&self, other: &Self) -> Ordering { + self.lexical_cmp(other) + } +} diff --git a/src/sdk/src/smt_native/mod.rs b/src/sdk/src/smt_native/mod.rs new file mode 100644 index 000000000..dae13f51d --- /dev/null +++ b/src/sdk/src/smt_native/mod.rs @@ -0,0 +1,46 @@ +/* This file is part of DarkFi (https://dark.fi) + * + * Copyright (C) 2020-2025 Dyne.org foundation + * Copyright (C) 2021 MONOLOG (Taeho Francis Lim and Jongwhan Lee) MIT License + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +/// Size of fixed length byte-array from a `Hasher`. +/// Equivalent to `key` length of the tree. +pub const HASH_LEN: usize = 32; + +/// A type representing length of `Bits` +pub type BitsLen = u16; + +/// Type indicating fixed length byte-array. +pub type Hash = [u8; HASH_LEN]; + +/// Type representing a Merkle proof +pub type Proof = Vec<(bool, Vec)>; + +/// The key to be used to restore the latest `root` +pub const ROOT_KEY: &Hash = b"_______monotree::headroot_______"; + +pub mod bits; + +pub mod node; + +pub mod tree; +pub use tree::Monotree; + +pub mod utils; + +#[cfg(test)] +mod tests; diff --git a/src/sdk/src/smt_native/node.rs b/src/sdk/src/smt_native/node.rs new file mode 100644 index 000000000..90c60ecfc --- /dev/null +++ b/src/sdk/src/smt_native/node.rs @@ -0,0 +1,152 @@ +/* This file is part of DarkFi (https://dark.fi) + * + * Copyright (C) 2020-2025 Dyne.org foundation + * Copyright (C) 2021 MONOLOG (Taeho Francis Lim and Jongwhan Lee) MIT License + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +use super::{ + bits::Bits, + utils::{bytes_to_int, nbytes_across}, + BitsLen, HASH_LEN, +}; +use crate::GenericResult; + +/// A type for describing components of `Node`: a real element of `Unit` +/// or a virtual element `None`. +pub type Cell<'a> = Option>; + +/// A component of `Node` consisting of `Hash` and `Bits`, which represents +/// a joint of subtrees it has. +#[derive(Clone, Debug, PartialEq)] +pub struct Unit<'a> { + pub hash: &'a [u8], + pub bits: Bits<'a>, +} + +/// The only component of `monotree`. In a big picture, `monotree` simply +/// consists of structured `Node`s. +/// +/// # Schematic +/// There are two types of `Node` -- ___Soft node___ and ___Hard node___. +/// * ___Hard___: A node that has two real cells as components. Two links to child nodes. +/// * ___Soft___: A node that has only one real cell and it has only one link going out to child node. +/// ```text +/// Root +/// / \ +/// NodeA NodeB +/// / \ \ +/// NodeC LeafB LeafC +/// / +/// LeafA +/// ``` +/// Where NodeA is a _Hard node_, NodeB and NodeC are _Soft nodes_. +/// +/// # Byte-Serialized View +/// Numbers in parentheses refer to byte length. +/// By default `HashLen = 32`, `BitsLen = 2`. +/// +/// _SoftNode_ = `Cell` + `0x00`(1), where +/// `Cell` = `hash`(`HASH_LEN`) + `path`(`< HASH_LEN`) + `range_start`(`BitsLen`) + `range_end`(`BitsLen`). +/// `0x00` is an indicator for soft node. +/// +/// _HardNode_ = `Cell_L` + `Cell_R` + `0x01`(1), where +/// `Cell_L` = `hash_L`(`HASH_LEN`) + `path_L`(`< HASH_LEN`) + `range_L_start`(`BitsLen`) + `range_L_end`(`BitsLen` +/// `Cell_R` = `path_R`(`< HASH_LEN`) _ `range_R_start`(`BitsLen`) + `range_R_end`(`BitsLen`) + `hash_R`(`HASH_LEN`). +/// `0x01` is an indicator for hard node. +/// +/// To make ***Merkle proof*** easier, we purposely placed the _hashes_ on outskirts of the serialized form. +/// With only 1-bit information of left or right, provers can easily guess +/// which side of the hash they hold should be appended for the next step. +/// Refer to `verify_proof()` implementation regarding on this discussion. +pub enum Node<'a> { + Soft(Cell<'a>), + Hard(Cell<'a>, Cell<'a>), +} + +impl<'a> Node<'a> { + pub fn new(lc: Cell<'a>, rc: Cell<'a>) -> Self { + match (&lc, &rc) { + (&Some(_), &None) => Node::Soft(lc), + (&None, &Some(_)) => Node::Soft(rc), + (&Some(_), &Some(_)) => Node::Hard(lc, rc), + _ => unreachable!("Node::new()"), + } + } + + /// Construct `Cell`s by deserializing bytes slice. + pub fn cells_from_bytes(bytes: &'a [u8], right: bool) -> GenericResult<(Cell<'a>, Cell<'a>)> { + match Node::from_bytes(bytes)? { + Node::Soft(cell) => Ok((cell, None)), + Node::Hard(lc, rc) => { + if right { + Ok((rc, lc)) + } else { + Ok((lc, rc)) + } + } + } + } + + fn parse_bytes(bytes: &'a [u8], right: bool) -> GenericResult<(Cell<'a>, usize)> { + let len_bytes = bytes.len(); + let len_bits = std::mem::size_of::(); + let offset_hash = if right { 0_usize } else { HASH_LEN }; + let range_hash = if right { len_bytes - HASH_LEN..len_bytes } else { 0..HASH_LEN }; + let start: BitsLen = bytes_to_int(&bytes[offset_hash..offset_hash + len_bits]); + let end: BitsLen = bytes_to_int(&bytes[offset_hash + len_bits..offset_hash + 2 * len_bits]); + let offset_bits = nbytes_across(start, end) as usize; + + Ok(( + Some(Unit { + hash: &bytes[range_hash], + bits: Bits { + path: &bytes + [offset_hash + 2 * len_bits..offset_hash + 2 * len_bits + offset_bits], + range: start..end, + }, + }), + offset_hash + 2 * len_bits + offset_bits, + )) + } + + /// Construct `Node` by deserializing bytes slice. + pub fn from_bytes(bytes: &'a [u8]) -> GenericResult { + match bytes.last() { + Some(&0x00) => { + let (cell, _) = Node::parse_bytes(&bytes[..bytes.len() - 1], false)?; + Ok(Node::Soft(cell)) + } + Some(&0x01) => { + let (lc, size) = Node::parse_bytes(bytes, false)?; + let (rc, _) = Node::parse_bytes(&bytes[size..bytes.len() - 1], true)?; + Ok(Node::Hard(lc, rc)) + } + _ => unreachable!("Node::from_bytes()"), + } + } + + /// Serialize `Node` into bytes. + pub fn to_bytes(&self) -> GenericResult> { + match self { + Node::Soft(Some(unit)) => Ok([unit.hash, &unit.bits.to_bytes()?, &[0x00]].concat()), + Node::Hard(Some(lu), Some(ru)) => { + let (lu, ru) = if ru.bits.first() { (lu, ru) } else { (ru, lu) }; + Ok([lu.hash, &lu.bits.to_bytes()?, &ru.bits.to_bytes()?, ru.hash, &[0x01]].concat()) + } + _ => unreachable!("node.to_bytes()"), + } + } +} diff --git a/src/sdk/src/smt_native/tests.rs b/src/sdk/src/smt_native/tests.rs new file mode 100644 index 000000000..d186ffdb1 --- /dev/null +++ b/src/sdk/src/smt_native/tests.rs @@ -0,0 +1,207 @@ +/* This file is part of DarkFi (https://dark.fi) + * + * Copyright (C) 2020-2025 Dyne.org foundation + * Copyright (C) 2021 MONOLOG (Taeho Francis Lim and Jongwhan Lee) MIT License + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +use super::{ + tree::verify_proof, + utils::{random_hashes, shuffle}, + Hash, Monotree, +}; + +#[test] +fn monotree_test_insert_then_verify_values() { + let keys = random_hashes(100); + let values = random_hashes(100); + + let mut root = None; + let mut tree = Monotree::new(); + + for (i, (key, value)) in keys.iter().zip(values.iter()).enumerate() { + root = tree.insert(root.as_ref(), key, value).unwrap(); + tree.set_headroot(root.as_ref()); + + for (k, v) in keys.iter().zip(values.iter()).take(i + 1) { + assert_eq!(tree.get(root.as_ref(), k).unwrap(), Some(*v)); + } + } + + assert_ne!(root, None); +} + +#[test] +fn monotree_test_insert_keys_then_gen_and_verify_proof() { + let keys = random_hashes(100); + let values = random_hashes(100); + + let mut root = None; + let mut tree = Monotree::new(); + + for (i, (key, value)) in keys.iter().zip(values.iter()).enumerate() { + root = tree.insert(root.as_ref(), key, value).unwrap(); + tree.set_headroot(root.as_ref()); + + for (k, v) in keys.iter().zip(values.iter()).take(i + 1) { + let proof = tree.get_merkle_proof(root.as_ref(), k).unwrap(); + assert!(verify_proof(root.as_ref(), v, proof.as_ref())); + } + } + + assert_ne!(root, None); +} + +#[test] +fn monotree_test_insert_keys_then_delete_keys_in_order() { + let keys = random_hashes(100); + let values = random_hashes(100); + + let mut root = None; + let mut tree = Monotree::new(); + + // pre-insertion for removal test + root = tree.inserts(root.as_ref(), &keys, &values).unwrap(); + tree.set_headroot(root.as_ref()); + + // Removal test with keys in order + for (i, (key, _)) in keys.iter().zip(values.iter()).enumerate() { + assert_ne!(root, None); + // Assert that all other values are fine after deletion + for (k, v) in keys.iter().zip(values.iter()).skip(i) { + assert_eq!(tree.get(root.as_ref(), k).unwrap(), Some(*v)); + let proof = tree.get_merkle_proof(root.as_ref(), k).unwrap(); + assert!(verify_proof(root.as_ref(), v, proof.as_ref())); + } + + // Delete a key and check if it worked + root = tree.remove(root.as_ref(), key).unwrap(); + tree.set_headroot(root.as_ref()); + assert_eq!(tree.get(root.as_ref(), key).unwrap(), None); + } + + // Back to initial state of tree + assert_eq!(root, None); +} + +#[test] +fn monotree_test_insert_then_delete_keys_reverse() { + let keys = random_hashes(100); + let values = random_hashes(100); + + let mut root = None; + let mut tree = Monotree::new(); + + // pre-insertion for removal test + root = tree.inserts(root.as_ref(), &keys, &values).unwrap(); + tree.set_headroot(root.as_ref()); + + // Removal test with keys in reverse order + for (i, (key, _)) in keys.iter().zip(values.iter()).rev().enumerate() { + assert_ne!(root, None); + // Assert that all other values are fine after deletion + for (k, v) in keys.iter().zip(values.iter()).rev().skip(i) { + assert_eq!(tree.get(root.as_ref(), k).unwrap(), Some(*v)); + let proof = tree.get_merkle_proof(root.as_ref(), k).unwrap(); + assert!(verify_proof(root.as_ref(), v, proof.as_ref())); + } + + // Delete a key and check if it worked + root = tree.remove(root.as_ref(), key).unwrap(); + tree.set_headroot(root.as_ref()); + assert_eq!(tree.get(root.as_ref(), key).unwrap(), None); + } + + // Back to initial state of tree + assert_eq!(root, None); +} + +#[test] +fn monotree_test_insert_then_delete_keys_random() { + let keys = random_hashes(100); + let values = random_hashes(100); + + let mut root = None; + let mut tree = Monotree::new(); + + // pre-insertion for removal test + root = tree.inserts(root.as_ref(), &keys, &values).unwrap(); + tree.set_headroot(root.as_ref()); + + // Shuffles keys/leaves' index for imitating random access + let mut idx: Vec = (0..keys.len()).collect(); + shuffle(&mut idx); + + // Test with shuffled keys + for (n, i) in idx.iter().enumerate() { + assert_ne!(root, None); + + // Assert that all values are fine after deletion + for j in idx.iter().skip(n) { + assert_eq!(tree.get(root.as_ref(), &keys[*j]).unwrap(), Some(values[*j])); + let proof = tree.get_merkle_proof(root.as_ref(), &keys[*j]).unwrap(); + assert!(verify_proof(root.as_ref(), &values[*j], proof.as_ref())); + } + + // Delete a key by random index and check if it worked + root = tree.remove(root.as_ref(), &keys[*i]).unwrap(); + tree.set_headroot(root.as_ref()); + assert_eq!(tree.get(root.as_ref(), &values[*i]).unwrap(), None); + } + + // Back to initial state of tree + assert_eq!(root, None); +} + +#[test] +fn monotree_test_deterministic_ordering() { + let keys = random_hashes(100); + let values = random_hashes(100); + + let mut root1 = None; + let mut tree1 = Monotree::new(); + + let mut root2 = None; + let mut tree2 = Monotree::new(); + + // Insert in normal order + root1 = tree1.inserts(root1.as_ref(), &keys, &values).unwrap(); + tree1.set_headroot(root1.as_ref()); + assert_ne!(root1, None); + + // Insert in reverse order + let rev_keys: Vec = keys.iter().rev().cloned().collect(); + let rev_vals: Vec = values.iter().rev().cloned().collect(); + root2 = tree2.inserts(root2.as_ref(), &rev_keys, &rev_vals).unwrap(); + tree2.set_headroot(root2.as_ref()); + assert_ne!(root2, None); + + // Verify roots match + assert_eq!(root1, root2); + + // Verify removal consistency + for key in keys { + root1 = tree1.remove(root1.as_ref(), &key).unwrap(); + tree1.set_headroot(root1.as_ref()); + + root2 = tree2.remove(root2.as_ref(), &key).unwrap(); + tree2.set_headroot(root2.as_ref()); + + assert_eq!(root1, root2); + } + + assert_eq!(root1, None); + assert_eq!(root2, None); +} diff --git a/src/sdk/src/smt_native/tree.rs b/src/sdk/src/smt_native/tree.rs new file mode 100644 index 000000000..980c7bab2 --- /dev/null +++ b/src/sdk/src/smt_native/tree.rs @@ -0,0 +1,428 @@ +/* This file is part of DarkFi (https://dark.fi) + * + * Copyright (C) 2020-2025 Dyne.org foundation + * Copyright (C) 2021 MONOLOG (Taeho Francis Lim and Jongwhan Lee) MIT License + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +use hashbrown::{HashMap, HashSet}; + +use super::{ + bits::Bits, + node::{Node, Unit}, + utils::{get_sorted_indices, slice_to_hash}, + Hash, Proof, HASH_LEN, ROOT_KEY, +}; +use crate::GenericResult; + +#[derive(Debug)] +pub(crate) struct MemCache { + pub(crate) set: HashSet, + pub(crate) map: HashMap>, +} + +#[allow(dead_code)] +impl MemCache { + pub(crate) fn new() -> Self { + Self { set: HashSet::new(), map: HashMap::with_capacity(1 << 12) } + } + + pub(crate) fn clear(&mut self) { + self.set.clear(); + self.map.clear(); + } + + pub(crate) fn contains(&self, key: &[u8]) -> bool { + !self.set.contains(key) && self.map.contains_key(key) + } + + pub(crate) fn get(&mut self, key: &[u8]) -> Option> { + self.map.get(key).cloned() + } + + pub(crate) fn put(&mut self, key: &[u8], value: Vec) { + self.map.insert(slice_to_hash(key), value); + self.set.remove(key); + } + + pub(crate) fn delete(&mut self, key: &[u8]) { + self.set.insert(slice_to_hash(key)); + } +} + +#[derive(Debug)] +pub struct MemoryDb { + db: HashMap>, + batch: MemCache, + batch_on: bool, +} + +#[allow(dead_code)] +impl MemoryDb { + fn new() -> Self { + Self { db: HashMap::new(), batch: MemCache::new(), batch_on: false } + } + + fn get(&mut self, key: &[u8]) -> GenericResult>> { + if self.batch_on && self.batch.contains(key) { + return Ok(self.batch.get(key)); + } + + match self.db.get(key) { + Some(v) => Ok(Some(v.to_owned())), + None => Ok(None), + } + } + + fn put(&mut self, key: &[u8], value: Vec) -> GenericResult<()> { + if self.batch_on { + self.batch.put(key, value); + } else { + self.db.insert(slice_to_hash(key), value); + } + Ok(()) + } + + fn delete(&mut self, key: &[u8]) -> GenericResult<()> { + if self.batch_on { + self.batch.delete(key); + } else { + self.db.remove(key); + } + Ok(()) + } + + fn init_batch(&mut self) -> GenericResult<()> { + if !self.batch_on { + self.batch.clear(); + self.batch_on = true; + } + Ok(()) + } + + fn finish_batch(&mut self) -> GenericResult<()> { + if self.batch_on { + for (key, value) in self.batch.map.drain() { + self.db.insert(key, value); + } + for key in self.batch.set.drain() { + self.db.remove(&key); + } + self.batch_on = false; + } + Ok(()) + } +} + +/// A structure for `monotree` +#[derive(Debug)] +pub struct Monotree { + db: MemoryDb, +} + +impl Default for Monotree { + fn default() -> Self { + Self::new() + } +} + +impl Monotree { + pub fn new() -> Self { + Self { db: MemoryDb::new() } + } + + fn hash_digest(bytes: &[u8]) -> Hash { + let mut hasher = blake3::Hasher::new(); + hasher.update(bytes); + let hash = hasher.finalize(); + slice_to_hash(hash.as_bytes()) + } + + /// Retrieves the latest state (root) from the database. + pub fn get_headroot(&mut self) -> GenericResult> { + let headroot = self.db.get(ROOT_KEY)?; + match headroot { + Some(root) => Ok(Some(slice_to_hash(&root))), + None => Ok(None), + } + } + + /// Sets the latest state (root) to the database. + pub fn set_headroot(&mut self, headroot: Option<&Hash>) { + if let Some(root) = headroot { + self.db.put(ROOT_KEY, root.to_vec()).expect("set_headroot(): hash"); + } + } + + pub fn prepare(&mut self) { + self.db.init_batch().expect("prepare(): failed to initialize batch"); + } + + pub fn commit(&mut self) { + self.db.finish_batch().expect("commit(): failed to initialize batch"); + } + + /// Insert key-leaf entry into the tree. Returns a new root hash. + pub fn insert( + &mut self, + root: Option<&Hash>, + key: &Hash, + leaf: &Hash, + ) -> GenericResult> { + match root { + None => { + let (hash, bits) = (leaf, Bits::new(key)); + self.put_node(Node::new(Some(Unit { hash, bits }), None)) + } + Some(root) => self.put(root, Bits::new(key), leaf), + } + } + + fn put_node(&mut self, node: Node) -> GenericResult> { + let bytes = node.to_bytes()?; + let hash = Self::hash_digest(&bytes); + self.db.put(&hash, bytes)?; + Ok(Some(hash)) + } + + /// Recursively insert a bytes (in forms of Bits) and a leaf into the tree. + /// + /// Optimisation in `monotree` is mainly to compress the path as much as possible + /// while reducing the number of db accesses using the most intuitive model. + /// As a result, compared to the standard Sparse Merkle Tree this reduces the + /// number of DB accesses from `N` to `log2(N)` in both reads and writes. + /// + /// Whenever invoked a `put()` call, at least, more than one `put_node()` called, + /// which triggers a single hash digest + a single DB write. + /// Compressing the path reduces the number of `put()` calls, which yields reducing + /// the number of hash function calls as well as the number of DB writes. + /// + /// There are four modes when putting the entries and each of them is processed in a + /// recursive `put()` call. + /// The number in parenthesis refers to the minimum of DB access and hash fn calls required. + /// + /// * set-aside (1) + /// Putting the leaf to the next node in the current depth. + /// * replacement (1) + /// Replaces the existing node on the path with the new leaf. + /// * consume & pass-over (2+) + /// Consuming the path on the way, then pass the rest of work to their child node. + /// * split-node (2) + /// Immediately split node into two with the longest common prefix, + /// then wind the recursive stack from there returning resulting hashes. + fn put(&mut self, root: &[u8], bits: Bits, leaf: &[u8]) -> GenericResult> { + let bytes = self.db.get(root)?.expect("bytes"); + let (lc, rc) = Node::cells_from_bytes(&bytes, bits.first())?; + let unit = lc.as_ref().expect("put(): left-unit"); + let n = Bits::len_common_bits(&unit.bits, &bits); + + match n { + 0 => self.put_node(Node::new(lc, Some(Unit { hash: leaf, bits }))), + n if n == bits.len() => self.put_node(Node::new(Some(Unit { hash: leaf, bits }), rc)), + n if n == unit.bits.len() => { + let hash = &self.put(unit.hash, bits.shift(n, false), leaf)?.expect("put(): hash"); + + let unit = unit.to_owned(); + self.put_node(Node::new(Some(Unit { hash, ..unit }), rc)) + } + _ => { + let bits = bits.shift(n, false); + let ru = Unit { hash: leaf, bits }; + + let (cloned, unit) = (unit.bits.clone(), unit.to_owned()); + let (hash, bits) = (unit.hash, unit.bits.shift(n, false)); + let lu = Unit { hash, bits }; + + // ENFORCE DETERMINISTIC ORDERING + let (left, right) = if lu.bits < ru.bits { (lu, ru) } else { (ru, lu) }; + + let hash = + &self.put_node(Node::new(Some(left), Some(right)))?.expect("put(): hash"); + let bits = cloned.shift(n, true); + self.put_node(Node::new(Some(Unit { hash, bits }), rc)) + } + } + } + + /// Get a leaf hash for the given root and key. + pub fn get(&mut self, root: Option<&Hash>, key: &Hash) -> GenericResult> { + match root { + None => Ok(None), + Some(root) => self.find_key(root, Bits::new(key)), + } + } + + fn find_key(&mut self, root: &[u8], bits: Bits) -> GenericResult> { + let bytes = self.db.get(root)?.expect("bytes"); + let (cell, _) = Node::cells_from_bytes(&bytes, bits.first())?; + let unit = cell.as_ref().expect("find_key(): left-unit"); + let n = Bits::len_common_bits(&unit.bits, &bits); + match n { + n if n == bits.len() => Ok(Some(slice_to_hash(unit.hash))), + n if n == unit.bits.len() => self.find_key(unit.hash, bits.shift(n, false)), + _ => Ok(None), + } + } + + /// Remove the given key and its corresponding leaf from the tree. Returns a new root hash. + pub fn remove(&mut self, root: Option<&Hash>, key: &[u8]) -> GenericResult> { + match root { + None => Ok(None), + Some(root) => self.delete_key(root, Bits::new(key)), + } + } + + fn delete_key(&mut self, root: &[u8], bits: Bits) -> GenericResult> { + let bytes = self.db.get(root)?.expect("bytes"); + let (lc, rc) = Node::cells_from_bytes(&bytes, bits.first())?; + let unit = lc.as_ref().expect("delete_key(): left-unit"); + let n = Bits::len_common_bits(&unit.bits, &bits); + + match n { + n if n == bits.len() => match rc { + Some(_) => self.put_node(Node::new(None, rc)), + None => Ok(None), + }, + n if n == unit.bits.len() => { + let hash = self.delete_key(unit.hash, bits.shift(n, false))?; + match (hash, &rc) { + (None, None) => Ok(None), + (None, Some(_)) => self.put_node(Node::new(None, rc)), + (Some(ref hash), _) => { + let unit = unit.to_owned(); + let lc = Some(Unit { hash, ..unit }); + self.put_node(Node::new(lc, rc)) + } + } + } + _ => Ok(None), + } + } + + /// This method is indented to use the `insert()` method in batch mode. + /// Note that `inserts()` forces the batch to commit. + pub fn inserts( + &mut self, + root: Option<&Hash>, + keys: &[Hash], + leaves: &[Hash], + ) -> GenericResult> { + let indices = get_sorted_indices(keys, false); + self.prepare(); + + let mut root = root.cloned(); + for i in indices.iter() { + root = self.insert(root.as_ref(), &keys[*i], &leaves[*i])?; + } + + self.commit(); + Ok(root) + } + + /// This method is intended to use the `get()` method in batch mode. + pub fn gets(&mut self, root: Option<&Hash>, keys: &[Hash]) -> GenericResult>> { + let mut leaves: Vec> = vec![]; + for key in keys.iter() { + leaves.push(self.get(root, key)?); + } + Ok(leaves) + } + + /// This method is intended to use the `remove()` method in batch mode. + /// Note that `removes()` forces the batch to commit. + pub fn removes(&mut self, root: Option<&Hash>, keys: &[Hash]) -> GenericResult> { + let indices = get_sorted_indices(keys, false); + let mut root = root.cloned(); + self.prepare(); + + for i in indices.iter() { + root = self.remove(root.as_ref(), &keys[*i])?; + } + + self.commit(); + Ok(root) + } + + /// Generate a Merkle proof for the given root and key. + pub fn get_merkle_proof( + &mut self, + root: Option<&Hash>, + key: &[u8], + ) -> GenericResult> { + let mut proof: Proof = vec![]; + match root { + None => Ok(None), + Some(root) => self.gen_proof(root, Bits::new(key), &mut proof), + } + } + + fn gen_proof( + &mut self, + root: &[u8], + bits: Bits, + proof: &mut Proof, + ) -> GenericResult> { + let bytes = self.db.get(root)?.expect("bytes"); + let (cell, _) = Node::cells_from_bytes(&bytes, bits.first())?; + let unit = cell.as_ref().expect("gen_proof(): left-unit"); + let n = Bits::len_common_bits(&unit.bits, &bits); + + match n { + n if n == bits.len() => { + proof.push(self.encode_proof(&bytes, bits.first())?); + Ok(Some(proof.to_owned())) + } + n if n == unit.bits.len() => { + proof.push(self.encode_proof(&bytes, bits.first())?); + self.gen_proof(unit.hash, bits.shift(n, false), proof) + } + _ => Ok(None), + } + } + + fn encode_proof(&self, bytes: &[u8], right: bool) -> GenericResult<(bool, Vec)> { + match Node::from_bytes(bytes)? { + Node::Soft(_) => Ok((false, bytes[HASH_LEN..].to_vec())), + Node::Hard(_, _) => { + if right { + Ok((true, [&bytes[..bytes.len() - HASH_LEN - 1], &[0x01]].concat())) + } else { + Ok((false, bytes[HASH_LEN..].to_vec())) + } + } + } + } +} + +/// Verify a MerkleProof with the given root and leaf. +pub fn verify_proof(root: Option<&Hash>, leaf: &Hash, proof: Option<&Proof>) -> bool { + match proof { + None => false, + Some(proof) => { + let mut hash = leaf.to_owned(); + proof.iter().rev().for_each(|(right, cut)| { + if *right { + let l = cut.len(); + let o = [&cut[..l - 1], &hash[..], &cut[l - 1..]].concat(); + hash = Monotree::hash_digest(&o); + } else { + let o = [&hash[..], &cut[..]].concat(); + hash = Monotree::hash_digest(&o); + } + }); + root.expect("verify_proof(): root") == &hash + } + } +} diff --git a/src/sdk/src/smt_native/utils.rs b/src/sdk/src/smt_native/utils.rs new file mode 100644 index 000000000..41667825f --- /dev/null +++ b/src/sdk/src/smt_native/utils.rs @@ -0,0 +1,179 @@ +/* This file is part of DarkFi (https://dark.fi) + * + * Copyright (C) 2020-2025 Dyne.org foundation + * Copyright (C) 2021 MONOLOG (Taeho Francis Lim and Jongwhan Lee) MIT License + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +use std::{cmp, ops::Range}; + +use num::{NumCast, PrimInt}; +use rand::Rng; + +use super::{Hash, HASH_LEN}; + +#[macro_export] +/// std::cmp::max() extension for use with multiple arguments. +macro_rules! max { + ($x:expr) => ($x); + ($x:expr, $($e:expr),+) => (cmp::max($x, max!($($e),+))); +} + +#[macro_export] +/// std::cmp::min() extension for use with multiple arguments. +macro_rules! min { + ($x:expr) => ($x); + ($x:expr, $($e:expr),+) => (cmp::min($x, min!($($e),+))); +} + +/// Cast from a typed scalar to another based on `num_traits` +pub fn cast(n: T) -> U { + NumCast::from(n).expect("cast(): Numcast") +} + +/// Generate a random byte based on `rand::random`. +pub fn random_byte() -> u8 { + rand::random::() +} + +/// Generate random bytes of the given length. +pub fn random_bytes(n: usize) -> Vec { + (0..n).map(|_| random_byte()).collect() +} + +/// Generate a random `Hash`, byte-array of `HASH_LEN` length. +pub fn random_hash() -> Hash { + slice_to_hash(&random_bytes(HASH_LEN)) +} + +/// Generate a vector of random `Hash` with the given length. +pub fn random_hashes(n: usize) -> Vec { + (0..n).map(|_| random_hash()).collect() +} + +/// Get a fixed length byte-array or `Hash` from slice. +pub fn slice_to_hash(slice: &[u8]) -> Hash { + let mut hash = [0x00; HASH_LEN]; + hash.copy_from_slice(slice); + hash +} + +/// Shuffle a slice using _Fisher-Yates_ algorithm. +pub fn shuffle(slice: &mut [T]) { + let mut rng = rand::thread_rng(); + let s = slice.len(); + (0..s).for_each(|i| { + let q = rng.gen_range(0..s); + slice.swap(i, q); + }); +} + +/// Get sorted indices from unsorted slice. +pub fn get_sorted_indices(slice: &[T], reverse: bool) -> Vec +where + T: Clone + cmp::Ord, +{ + let mut t: Vec<_> = slice.iter().enumerate().collect(); + + if reverse { + t.sort_unstable_by(|(_, a), (_, b)| b.cmp(a)); + } else { + t.sort_unstable_by(|(_, a), (_, b)| a.cmp(b)); + } + + t.iter().map(|(i, _)| *i).collect() +} + +/// Get length of the longest common prefix bits for the given two slices. +pub fn len_lcp(a: &[u8], m: &Range, b: &[u8], n: &Range) -> T +where + T: PrimInt + NumCast, + Range: Iterator, +{ + let count = (cast(0)..min!(m.end - m.start, n.end - n.start)) + .take_while(|&i| bit(a, m.start + i) == bit(b, n.start + i)) + .count(); + cast(count) +} + +/// Get `i`-th bit from bytes slice. Index `i` starts from 0. +pub fn bit(bytes: &[u8], i: T) -> bool { + let q = i.to_usize().expect("bit(): usize") / 8; + let r = i.to_u8().expect("bit(): u8") % 8; + (bytes[q] >> (7 - r)) & 0x01 == 0x01 +} + +/// Get the required length of bytes from a `Range`, bits indices across the bytes. +pub fn nbytes_across(start: T, end: T) -> T { + let n = (end - (start - start % cast(8))) / cast(8); + + if end % cast(8) == cast(0) { + n + } else { + n + cast(1) + } +} + +/// Adjust the bytes representation for `Bits` when shifted. +/// Returns a bytes shift, `n` and thereby resulting shifted range, `R`. +pub fn offsets(range: &Range, n: T, tail: bool) -> (T, Range) { + let x = range.start + n; + let e: T = cast(8); + if tail { + (nbytes_across(range.start, x), range.start..x) + } else { + (x / e, x % e..range.end - e * (x / e)) + } +} + +/// Convert big-endian bytes into base10 or decimal number. +pub fn bytes_to_int(bytes: &[u8]) -> T { + let l = bytes.len(); + let sum = (0..l).fold(0, |sum, i| sum + (1 << ((l - i - 1) * 8)) * bytes[i] as usize); + cast(sum) +} + +/// Get a compressed bytes (leading-zero-truncated big-endian bytes) from a `u64`. +pub fn int_to_bytes(number: u64) -> Vec { + match number { + 0 => vec![0x00], + _ => number.to_be_bytes().iter().skip_while(|&x| *x == 0x00).copied().collect(), + } +} + +/// Convert a Vec slice of bit or `bool` into a number as `usize`. +pub fn bits_to_usize(bits: &[bool]) -> usize { + let l = bits.len(); + (0..l).fold(0, |sum, i| sum + ((bits[i] as usize) << (l - 1 - i))) +} + +/// Convert a bytes slice into a Vec of bit. +pub fn bytes_to_bits(bytes: &[u8]) -> Vec { + bytes_to_slicebit(bytes, &(0..bytes.len() * 8)) +} + +/// Convert (bytes slice + Range) representation into bits in forms of `Vec`. +pub fn bytes_to_slicebit(bytes: &[u8], range: &Range) -> Vec +where + T: PrimInt + NumCast, + Range: Iterator, +{ + range.clone().map(|x| bit(bytes, x)).collect() +} + +/// Convert bits, Vec slice of `bool` into bytes, `Vec`. +pub fn bits_to_bytes(bits: &[bool]) -> Vec { + bits.rchunks(8).rev().map(|v| bits_to_usize(v) as u8).collect() +}