perf: use binary search in ForwardInMemoryCursor (#21049)

This commit is contained in:
Arsenii Kulikov
2026-01-14 19:31:11 +00:00
committed by GitHub
parent 8cb506c4d3
commit 1bc07fad8e

View File

@@ -53,9 +53,13 @@ impl<'a, K, V> ForwardInMemoryCursor<'a, K, V> {
}
}
/// Threshold for remaining entries above which binary search is used instead of linear scan.
/// For small slices, linear scan has better cache locality and lower overhead.
const BINARY_SEARCH_THRESHOLD: usize = 64;
impl<K, V> ForwardInMemoryCursor<'_, K, V>
where
K: PartialOrd + Clone,
K: Ord + Clone,
V: Clone,
{
/// Returns the first entry from the current cursor position that's greater or equal to the
@@ -73,19 +77,22 @@ where
/// Advances the cursor forward while `predicate` returns `true` or until the collection is
/// exhausted.
///
/// Uses binary search for large remaining slices (>= 64 entries), linear scan for small ones.
///
/// Returns the first entry for which `predicate` returns `false` or `None`. The cursor will
/// point to the returned entry.
fn advance_while(&mut self, predicate: impl Fn(&K) -> bool) -> Option<(K, V)> {
let mut entry;
loop {
entry = self.current();
if entry.is_some_and(|(k, _)| predicate(k)) {
let remaining = self.entries.len().saturating_sub(self.idx);
if remaining >= BINARY_SEARCH_THRESHOLD {
let slice = &self.entries[self.idx..];
let pos = slice.partition_point(|(k, _)| predicate(k));
self.idx += pos;
} else {
while self.current().is_some_and(|(k, _)| predicate(k)) {
self.next();
} else {
break;
}
}
entry.cloned()
self.current().cloned()
}
}
@@ -94,7 +101,7 @@ mod tests {
use super::*;
#[test]
fn test_cursor() {
fn test_cursor_small() {
let mut cursor = ForwardInMemoryCursor::new(&[(1, ()), (2, ()), (3, ()), (4, ()), (5, ())]);
assert_eq!(cursor.current(), Some(&(1, ())));
@@ -113,4 +120,72 @@ mod tests {
assert_eq!(cursor.seek(&6), None);
assert_eq!(cursor.current(), None);
}
#[test]
fn test_cursor_large_binary_search() {
// Create a large enough collection to trigger binary search
let entries: Vec<(i32, ())> = (0..200).map(|i| (i * 2, ())).collect();
let mut cursor = ForwardInMemoryCursor::new(&entries);
// Seek to beginning
assert_eq!(cursor.seek(&0), Some((0, ())));
assert_eq!(cursor.idx, 0);
// Seek to middle (should use binary search)
assert_eq!(cursor.seek(&100), Some((100, ())));
assert_eq!(cursor.idx, 50);
// Seek to non-existent key (should find next greater)
assert_eq!(cursor.seek(&101), Some((102, ())));
assert_eq!(cursor.idx, 51);
// Seek to end
assert_eq!(cursor.seek(&398), Some((398, ())));
assert_eq!(cursor.idx, 199);
// Seek past end
assert_eq!(cursor.seek(&1000), None);
}
#[test]
fn test_first_after_large() {
let entries: Vec<(i32, ())> = (0..200).map(|i| (i * 2, ())).collect();
let mut cursor = ForwardInMemoryCursor::new(&entries);
// first_after should find strictly greater
assert_eq!(cursor.first_after(&0), Some((2, ())));
assert_eq!(cursor.idx, 1);
// Reset and test from beginning
cursor.reset();
assert_eq!(cursor.first_after(&99), Some((100, ())));
// first_after on exact match
cursor.reset();
assert_eq!(cursor.first_after(&100), Some((102, ())));
}
#[test]
fn test_cursor_consistency() {
// Verify binary search and linear scan produce same results
let entries: Vec<(i32, ())> = (0..200).map(|i| (i * 3, ())).collect();
for search_key in [0, 1, 3, 50, 150, 299, 300, 597, 598, 599, 1000] {
// Test with fresh cursor (binary search path)
let mut cursor1 = ForwardInMemoryCursor::new(&entries);
let result1 = cursor1.seek(&search_key);
// Manually advance to trigger linear path by getting close first
let mut cursor2 = ForwardInMemoryCursor::new(&entries);
if search_key > 100 {
cursor2.seek(&(search_key - 50));
}
let result2 = cursor2.seek(&search_key);
assert_eq!(
result1, result2,
"Mismatch for key {search_key}: binary={result1:?}, linear={result2:?}"
);
}
}
}