From 503748b0f7b3d32ff2649646835702bb629a2403 Mon Sep 17 00:00:00 2001 From: Roman Krasiuk Date: Wed, 26 Apr 2023 19:54:49 +0300 Subject: [PATCH] bench(trie): prefix set (#2406) --- Cargo.lock | 1 + crates/trie/Cargo.toml | 7 +- crates/trie/benches/prefix_set.rs | 284 ++++++++++++++++++++++++++++++ 3 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 crates/trie/benches/prefix_set.rs diff --git a/Cargo.lock b/Cargo.lock index f2dc6ad75c..671b8858d0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5463,6 +5463,7 @@ dependencies = [ name = "reth-trie" version = "0.1.0" dependencies = [ + "criterion", "derive_more", "hex", "proptest", diff --git a/crates/trie/Cargo.toml b/crates/trie/Cargo.toml index 83962a1e9e..75ecc4c4b4 100644 --- a/crates/trie/Cargo.toml +++ b/crates/trie/Cargo.toml @@ -43,6 +43,11 @@ triehash = "0.8" proptest = "1.0" tokio = { version = "1.21.2", default-features = false, features = ["sync", "rt", "macros"] } tokio-stream = "0.1.10" +criterion = "0.4" [features] -test-utils = ["triehash"] \ No newline at end of file +test-utils = ["triehash"] + +[[bench]] +name = "prefix_set" +harness = false diff --git a/crates/trie/benches/prefix_set.rs b/crates/trie/benches/prefix_set.rs new file mode 100644 index 0000000000..022d20cef8 --- /dev/null +++ b/crates/trie/benches/prefix_set.rs @@ -0,0 +1,284 @@ +#![allow(dead_code, unused_imports, non_snake_case)] + +use criterion::{ + black_box, criterion_group, criterion_main, measurement::WallTime, BenchmarkGroup, Criterion, +}; +use proptest::{ + arbitrary::Arbitrary, + prelude::*, + strategy::{Strategy, ValueTree}, + test_runner::{basic_result_cache, TestRunner}, +}; +use reth_db::{ + cursor::{DbCursorRW, DbDupCursorRO, DbDupCursorRW}, + mdbx::Env, + TxHashNumber, +}; +use reth_primitives::H256; +use reth_trie::{prefix_set::PrefixSet, Nibbles}; +use std::{ + collections::{BTreeSet, HashSet}, + time::Instant, +}; + +pub trait PrefixSetAbstraction: Default { + fn insert(&mut self, key: Nibbles); + fn contains(&mut self, key: Nibbles) -> bool; +} + +impl PrefixSetAbstraction for PrefixSet { + fn insert(&mut self, key: Nibbles) { + self.insert(key) + } + + fn contains(&mut self, key: Nibbles) -> bool { + PrefixSet::contains(&self, key) + } +} + +pub fn prefix_set_lookups(c: &mut Criterion) { + let mut group = c.benchmark_group("Prefix Set Lookups"); + + for size in [10, 100, 1_000, 10_000] { + let test_data = generate_test_data(size); + + use implementations::*; + prefix_set_bench::( + &mut group, + "`BTreeSet` with `Iterator::any` lookup", + test_data.clone(), + ); + prefix_set_bench::( + &mut group, + "`BTreeSet` with `BTreeSet::range` lookup", + test_data.clone(), + ); + prefix_set_bench::( + &mut group, + "`Vec` with custom cursor lookup", + test_data.clone(), + ); + prefix_set_bench::( + &mut group, + "`Vec` with binary search lookup", + test_data.clone(), + ); + } +} + +fn prefix_set_bench( + group: &mut BenchmarkGroup, + description: &str, + (preload, input, expected): (Vec, Vec, Vec), +) { + let setup = || { + let mut prefix_set = T::default(); + for key in preload.iter() { + prefix_set.insert(key.clone()); + } + (prefix_set, input.clone(), expected.clone()) + }; + + let group_id = format!( + "prefix set | preload size: {} | input size: {} | {}", + preload.len(), + input.len(), + description + ); + group.bench_function(group_id, |b| { + b.iter_with_setup(setup, |(mut prefix_set, input, expected)| { + for (idx, key) in input.into_iter().enumerate() { + let result = black_box(prefix_set.contains(key)); + assert_eq!(result, expected[idx]); + } + }); + }); +} + +fn generate_test_data(size: usize) -> (Vec, Vec, Vec) { + use prop::collection::vec; + + let config = ProptestConfig { result_cache: basic_result_cache, ..Default::default() }; + let mut runner = TestRunner::new(config); + + let mut preload = vec(vec(any::(), 32), size).new_tree(&mut runner).unwrap().current(); + preload.dedup(); + preload.sort(); + let preload = preload.into_iter().map(|hash| Nibbles::from(&hash[..])).collect::>(); + + let mut input = vec(vec(any::(), 0..=32), size).new_tree(&mut runner).unwrap().current(); + input.dedup(); + input.sort(); + let input = input.into_iter().map(|bytes| Nibbles::from(&bytes[..])).collect::>(); + + let expected = input + .iter() + .map(|prefix| preload.iter().any(|key| key.has_prefix(&prefix))) + .collect::>(); + (preload, input, expected) +} + +criterion_group!(prefix_set, prefix_set_lookups); +criterion_main!(prefix_set); + +mod implementations { + use super::*; + use std::{ + collections::btree_set::Range, iter::Peekable, marker::PhantomPinned, ops::Bound, pin::Pin, + ptr::NonNull, + }; + + #[derive(Default)] + pub struct BTreeAnyPrefixSet { + keys: BTreeSet, + } + + impl PrefixSetAbstraction for BTreeAnyPrefixSet { + fn contains(&mut self, key: Nibbles) -> bool { + self.keys.iter().any(|k| k.has_prefix(&key)) + } + + fn insert(&mut self, key: Nibbles) { + self.keys.insert(key); + } + } + + #[derive(Default)] + pub struct BTreeRangeLastCheckedPrefixSet { + keys: BTreeSet, + last_checked: Option, + } + + impl PrefixSetAbstraction for BTreeRangeLastCheckedPrefixSet { + fn contains(&mut self, prefix: Nibbles) -> bool { + let range = match self.last_checked.as_ref() { + // presumably never hit + Some(last) if &prefix < last => (Bound::Unbounded, Bound::Excluded(last)), + Some(last) => (Bound::Included(last), Bound::Unbounded), + None => (Bound::Unbounded, Bound::Unbounded), + }; + for key in self.keys.range(range) { + if key.has_prefix(&prefix) { + self.last_checked = Some(prefix); + return true + } + + if key > &prefix { + self.last_checked = Some(prefix); + return false + } + } + + false + } + + fn insert(&mut self, key: Nibbles) { + self.keys.insert(key); + } + } + + #[derive(Default)] + pub struct VecBinarySearchPrefixSet { + keys: Vec, + sorted: bool, + } + + impl PrefixSetAbstraction for VecBinarySearchPrefixSet { + fn contains(&mut self, prefix: Nibbles) -> bool { + if !self.sorted { + self.keys.sort(); + self.sorted = true; + } + + match self.keys.binary_search(&prefix) { + Ok(_) => true, + Err(idx) => match self.keys.get(idx) { + Some(key) => key.has_prefix(&prefix), + None => false, // prefix > last key + }, + } + } + + fn insert(&mut self, key: Nibbles) { + self.sorted = false; + self.keys.push(key); + } + } + + #[derive(Default)] + pub struct VecCursorPrefixSet { + keys: Vec, + sorted: bool, + index: usize, + } + + impl PrefixSetAbstraction for VecCursorPrefixSet { + fn contains(&mut self, prefix: Nibbles) -> bool { + if !self.sorted { + self.keys.sort(); + self.sorted = true; + } + + let prefix = prefix.into(); + + while self.index > 0 && self.keys[self.index] > prefix { + self.index -= 1; + } + + for (idx, key) in self.keys[self.index..].iter().enumerate() { + if key.has_prefix(&prefix) { + self.index += idx; + return true + } + + if key > &prefix { + self.index += idx; + return false + } + } + + false + } + + fn insert(&mut self, nibbles: Nibbles) { + self.sorted = false; + self.keys.push(nibbles.into()); + } + } + + #[derive(Default)] + pub struct VecBinarySearchWithLastFoundPrefixSet { + keys: Vec, + last_found_idx: usize, + sorted: bool, + } + + impl PrefixSetAbstraction for VecBinarySearchWithLastFoundPrefixSet { + fn contains(&mut self, prefix: Nibbles) -> bool { + if !self.sorted { + self.keys.sort(); + self.sorted = true; + } + + while self.last_found_idx > 0 && self.keys[self.last_found_idx] > prefix { + self.last_found_idx -= 1; + } + + match self.keys[self.last_found_idx..].binary_search(&prefix) { + Ok(_) => true, + Err(idx) => match self.keys.get(idx) { + Some(key) => { + self.last_found_idx = idx; + key.has_prefix(&prefix) + } + None => false, // prefix > last key + }, + } + } + + fn insert(&mut self, key: Nibbles) { + self.sorted = false; + self.keys.push(key); + } + } +}