diff --git a/crates/storage/provider/src/providers/mod.rs b/crates/storage/provider/src/providers/mod.rs index ae8cef19ef..c2613caaf7 100644 --- a/crates/storage/provider/src/providers/mod.rs +++ b/crates/storage/provider/src/providers/mod.rs @@ -16,8 +16,8 @@ pub use static_file::{ mod state; pub use state::{ historical::{ - find_changeset_block_from_index, HistoricalStateProvider, HistoricalStateProviderRef, HistoryInfo, - LowestAvailableBlocks, + find_changeset_block_from_index, HistoricalStateProvider, HistoricalStateProviderRef, + HistoryInfo, LowestAvailableBlocks, }, latest::{LatestStateProvider, LatestStateProviderRef}, overlay::{OverlayStateProvider, OverlayStateProviderFactory}, diff --git a/crates/storage/provider/src/providers/state/historical.rs b/crates/storage/provider/src/providers/state/historical.rs index a4da086092..fd1cbd0e4e 100644 --- a/crates/storage/provider/src/providers/state/historical.rs +++ b/crates/storage/provider/src/providers/state/historical.rs @@ -50,60 +50,6 @@ pub enum HistoryInfo { MaybeInPlainState, } -/// Computes [`HistoryInfo`] from a shard chunk using rank/select. -/// -/// This is the core algorithm shared by both MDBX and `RocksDB` backends. -/// It determines where to look for the value at a given block number based on the -/// history index stored in a [`BlockNumberList`] (which wraps a `RoaringTreemap`). -/// -/// # Arguments -/// * `chunk` - The block number list from the shard -/// * `block_number` - Target block to look up -/// * `has_previous_shard` - Whether there's a shard before this one for the same key -/// * `lowest_available` - Lowest block where history is available (pruning boundary) -pub fn find_changeset_block_from_index( - chunk: &BlockNumberList, - block_number: BlockNumber, - has_previous_shard: bool, - lowest_available: Option, -) -> HistoryInfo { - // Get the rank of the first entry before or equal to our block. - let mut rank = chunk.rank(block_number); - - // Adjust the rank, so that we have the rank of the first entry strictly before our - // block (not equal to it). - if rank.checked_sub(1).and_then(|r| chunk.select(r)) == Some(block_number) { - rank -= 1; - } - - let found_block = chunk.select(rank); - - // If our block is before the first entry in the index chunk and this first entry - // doesn't equal to our block, it might be before the first write ever. To check, we - // look at the previous entry and check if the key is the same. - // This check is worth it, the `cursor.prev()` check is rarely triggered (the if will - // short-circuit) and when it passes we save a full seek into the changeset/plain state - // table. - if rank == 0 && found_block != Some(block_number) && !has_previous_shard { - if let (Some(_), Some(bn)) = (lowest_available, found_block) { - // The key may have been written, but due to pruning we may not have changesets - // and history, so we need to make a changeset lookup. - return HistoryInfo::InChangeset(bn); - } - // The key is written to, but only after our block. - return HistoryInfo::NotYetWritten; - } - - if let Some(block_number) = found_block { - // The chunk contains an entry for a write after our block, return it. - HistoryInfo::InChangeset(block_number) - } else { - // The chunk does not contain an entry for a write after our block. This can only - // happen if this is the last chunk and so we need to look in the plain state. - HistoryInfo::InPlainState - } -} - /// State provider for a given block number which takes a tx reference. /// /// Historical state provider accesses the state at the start of the provided block number. @@ -234,13 +180,30 @@ impl<'b, Provider: DBProvider + BlockNumReader> HistoricalStateProviderRef<'b, P // index, the first chunk for the next key will be returned so we filter out chunks that // have a different key. if let Some(chunk) = cursor.seek(key)?.filter(|(key, _)| key_filter(key)).map(|x| x.1) { - // Check if there's a previous shard for the same key - let has_previous_shard = cursor.prev()?.is_some_and(|(key, _)| key_filter(&key)); + // Get the rank of the first entry before or equal to our block. + let mut rank = chunk.rank(self.block_number); + + // Adjust the rank, so that we have the rank of the first entry strictly before our + // block (not equal to it). + if rank.checked_sub(1).and_then(|r| chunk.select(r)) == Some(self.block_number) { + rank -= 1; + } + + let found_block = chunk.select(rank); + + // If our block is before the first entry in the index chunk and this first entry + // doesn't equal to our block, it might be before the first write ever. To check, we + // look at the previous entry and check if the key is the same. + // This check is worth it, the `cursor.prev()` check is rarely triggered (the if will + // short-circuit) and when it passes we save a full seek into the changeset/plain state + // table. + let is_before_first_write = + needs_prev_shard_check(rank, found_block, self.block_number) && + !cursor.prev()?.is_some_and(|(key, _)| key_filter(&key)); Ok(find_changeset_block_from_index( - &chunk, - self.block_number, - has_previous_shard, + found_block, + is_before_first_write, lowest_available_block_number, )) } else if lowest_available_block_number.is_some() { @@ -559,6 +522,49 @@ impl LowestAvailableBlocks { } } +/// Checks if a previous shard lookup is needed to determine if we're before the first write. +/// +/// Returns `true` when `rank == 0` (first entry in shard) and the found block doesn't match +/// the target block number. In this case, we need to check if there's a previous shard. +fn needs_prev_shard_check(rank: u64, found_block: Option, block_number: BlockNumber) -> bool { + rank == 0 && found_block != Some(block_number) +} + +/// Determines where to find the historical value based on computed shard lookup results. +/// +/// This is a pure function shared by both MDBX and `RocksDB` backends. +/// +/// # Arguments +/// * `found_block` - The block number from the shard lookup +/// * `is_before_first_write` - True if the target block is before the first write to this key. This +/// should be computed as: `rank == 0 && found_block != Some(block_number) && !has_previous_shard` +/// where `has_previous_shard` comes from a lazy `cursor.prev()` check. +/// * `lowest_available` - Lowest block where history is available (pruning boundary) +pub const fn find_changeset_block_from_index( + found_block: Option, + is_before_first_write: bool, + lowest_available: Option, +) -> HistoryInfo { + if is_before_first_write { + if let (Some(_), Some(block_number)) = (lowest_available, found_block) { + // The key may have been written, but due to pruning we may not have changesets + // and history, so we need to make a changeset lookup. + return HistoryInfo::InChangeset(block_number) + } + // The key is written to, but only after our block. + return HistoryInfo::NotYetWritten + } + + if let Some(block_number) = found_block { + // The chunk contains an entry for a write after our block, return it. + HistoryInfo::InChangeset(block_number) + } else { + // The chunk does not contain an entry for a write after our block. This can only + // happen if this is the last chunk and so we need to look in the plain state. + HistoryInfo::InPlainState + } +} + #[cfg(test)] mod tests { use crate::{