From 1bc07fad8e2b04978d32289c850e0eb17afa8a06 Mon Sep 17 00:00:00 2001 From: Arsenii Kulikov Date: Wed, 14 Jan 2026 19:31:11 +0000 Subject: [PATCH] perf: use binary search in `ForwardInMemoryCursor` (#21049) --- crates/trie/trie/src/forward_cursor.rs | 93 +++++++++++++++++++++++--- 1 file changed, 84 insertions(+), 9 deletions(-) diff --git a/crates/trie/trie/src/forward_cursor.rs b/crates/trie/trie/src/forward_cursor.rs index 5abb5e2431..e6a98a6186 100644 --- a/crates/trie/trie/src/forward_cursor.rs +++ b/crates/trie/trie/src/forward_cursor.rs @@ -53,9 +53,13 @@ impl<'a, K, V> ForwardInMemoryCursor<'a, K, V> { } } +/// Threshold for remaining entries above which binary search is used instead of linear scan. +/// For small slices, linear scan has better cache locality and lower overhead. +const BINARY_SEARCH_THRESHOLD: usize = 64; + impl ForwardInMemoryCursor<'_, K, V> where - K: PartialOrd + Clone, + K: Ord + Clone, V: Clone, { /// Returns the first entry from the current cursor position that's greater or equal to the @@ -73,19 +77,22 @@ where /// Advances the cursor forward while `predicate` returns `true` or until the collection is /// exhausted. /// + /// Uses binary search for large remaining slices (>= 64 entries), linear scan for small ones. + /// /// Returns the first entry for which `predicate` returns `false` or `None`. The cursor will /// point to the returned entry. fn advance_while(&mut self, predicate: impl Fn(&K) -> bool) -> Option<(K, V)> { - let mut entry; - loop { - entry = self.current(); - if entry.is_some_and(|(k, _)| predicate(k)) { + let remaining = self.entries.len().saturating_sub(self.idx); + if remaining >= BINARY_SEARCH_THRESHOLD { + let slice = &self.entries[self.idx..]; + let pos = slice.partition_point(|(k, _)| predicate(k)); + self.idx += pos; + } else { + while self.current().is_some_and(|(k, _)| predicate(k)) { self.next(); - } else { - break; } } - entry.cloned() + self.current().cloned() } } @@ -94,7 +101,7 @@ mod tests { use super::*; #[test] - fn test_cursor() { + fn test_cursor_small() { let mut cursor = ForwardInMemoryCursor::new(&[(1, ()), (2, ()), (3, ()), (4, ()), (5, ())]); assert_eq!(cursor.current(), Some(&(1, ()))); @@ -113,4 +120,72 @@ mod tests { assert_eq!(cursor.seek(&6), None); assert_eq!(cursor.current(), None); } + + #[test] + fn test_cursor_large_binary_search() { + // Create a large enough collection to trigger binary search + let entries: Vec<(i32, ())> = (0..200).map(|i| (i * 2, ())).collect(); + let mut cursor = ForwardInMemoryCursor::new(&entries); + + // Seek to beginning + assert_eq!(cursor.seek(&0), Some((0, ()))); + assert_eq!(cursor.idx, 0); + + // Seek to middle (should use binary search) + assert_eq!(cursor.seek(&100), Some((100, ()))); + assert_eq!(cursor.idx, 50); + + // Seek to non-existent key (should find next greater) + assert_eq!(cursor.seek(&101), Some((102, ()))); + assert_eq!(cursor.idx, 51); + + // Seek to end + assert_eq!(cursor.seek(&398), Some((398, ()))); + assert_eq!(cursor.idx, 199); + + // Seek past end + assert_eq!(cursor.seek(&1000), None); + } + + #[test] + fn test_first_after_large() { + let entries: Vec<(i32, ())> = (0..200).map(|i| (i * 2, ())).collect(); + let mut cursor = ForwardInMemoryCursor::new(&entries); + + // first_after should find strictly greater + assert_eq!(cursor.first_after(&0), Some((2, ()))); + assert_eq!(cursor.idx, 1); + + // Reset and test from beginning + cursor.reset(); + assert_eq!(cursor.first_after(&99), Some((100, ()))); + + // first_after on exact match + cursor.reset(); + assert_eq!(cursor.first_after(&100), Some((102, ()))); + } + + #[test] + fn test_cursor_consistency() { + // Verify binary search and linear scan produce same results + let entries: Vec<(i32, ())> = (0..200).map(|i| (i * 3, ())).collect(); + + for search_key in [0, 1, 3, 50, 150, 299, 300, 597, 598, 599, 1000] { + // Test with fresh cursor (binary search path) + let mut cursor1 = ForwardInMemoryCursor::new(&entries); + let result1 = cursor1.seek(&search_key); + + // Manually advance to trigger linear path by getting close first + let mut cursor2 = ForwardInMemoryCursor::new(&entries); + if search_key > 100 { + cursor2.seek(&(search_key - 50)); + } + let result2 = cursor2.seek(&search_key); + + assert_eq!( + result1, result2, + "Mismatch for key {search_key}: binary={result1:?}, linear={result2:?}" + ); + } + } }