From 905bb95f8bff2aee999197f511f99b4aebf2dddb Mon Sep 17 00:00:00 2001 From: Matthias Seitz Date: Fri, 16 Jan 2026 19:25:04 +0100 Subject: [PATCH] perf(engine): defer trie overlay computation with LazyOverlay (#21133) --- crates/chain-state/src/lazy_overlay.rs | 231 ++++++++++++++++++ crates/chain-state/src/lib.rs | 3 + crates/engine/tree/src/tree/metrics.rs | 4 - .../engine/tree/src/tree/payload_validator.rs | 165 ++----------- .../provider/src/providers/state/overlay.rs | 152 ++++++++---- 5 files changed, 366 insertions(+), 189 deletions(-) create mode 100644 crates/chain-state/src/lazy_overlay.rs diff --git a/crates/chain-state/src/lazy_overlay.rs b/crates/chain-state/src/lazy_overlay.rs new file mode 100644 index 0000000000..a0295c9a5b --- /dev/null +++ b/crates/chain-state/src/lazy_overlay.rs @@ -0,0 +1,231 @@ +//! Lazy overlay computation for trie input. +//! +//! This module provides [`LazyOverlay`], a type that computes the [`TrieInputSorted`] +//! lazily on first access. This allows execution to start before the trie overlay +//! is fully computed. + +use crate::DeferredTrieData; +use alloy_primitives::B256; +use reth_trie::{updates::TrieUpdatesSorted, HashedPostStateSorted, TrieInputSorted}; +use std::sync::{Arc, OnceLock}; +use tracing::{debug, trace}; + +/// Threshold for switching from `extend_ref` loop to `merge_batch`. +/// +/// Benchmarked crossover: `extend_ref` wins up to ~64 blocks, `merge_batch` wins beyond. +const MERGE_BATCH_THRESHOLD: usize = 64; + +/// Inputs captured for lazy overlay computation. +#[derive(Clone)] +struct LazyOverlayInputs { + /// The persisted ancestor hash (anchor) this overlay should be built on. + anchor_hash: B256, + /// Deferred trie data handles for all in-memory blocks (newest to oldest). + blocks: Vec, +} + +/// Lazily computed trie overlay. +/// +/// Captures the inputs needed to compute a [`TrieInputSorted`] and defers the actual +/// computation until first access. This is conceptually similar to [`DeferredTrieData`] +/// but for overlay computation. +/// +/// # Fast Path vs Slow Path +/// +/// - **Fast path**: If the tip block's cached `anchored_trie_input` is ready and its `anchor_hash` +/// matches our expected anchor, we can reuse it directly (O(1)). +/// - **Slow path**: Otherwise, we merge all ancestor blocks' trie data into a new overlay. +#[derive(Clone)] +pub struct LazyOverlay { + /// Computed result, cached after first access. + inner: Arc>, + /// Inputs for lazy computation. + inputs: LazyOverlayInputs, +} + +impl std::fmt::Debug for LazyOverlay { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("LazyOverlay") + .field("anchor_hash", &self.inputs.anchor_hash) + .field("num_blocks", &self.inputs.blocks.len()) + .field("computed", &self.inner.get().is_some()) + .finish() + } +} + +impl LazyOverlay { + /// Create a new lazy overlay with the given anchor hash and block handles. + /// + /// # Arguments + /// + /// * `anchor_hash` - The persisted ancestor hash this overlay is built on top of + /// * `blocks` - Deferred trie data handles for in-memory blocks (newest to oldest) + pub fn new(anchor_hash: B256, blocks: Vec) -> Self { + Self { inner: Arc::new(OnceLock::new()), inputs: LazyOverlayInputs { anchor_hash, blocks } } + } + + /// Returns the anchor hash this overlay is built on. + pub const fn anchor_hash(&self) -> B256 { + self.inputs.anchor_hash + } + + /// Returns the number of in-memory blocks this overlay covers. + pub const fn num_blocks(&self) -> usize { + self.inputs.blocks.len() + } + + /// Returns true if the overlay has already been computed. + pub fn is_computed(&self) -> bool { + self.inner.get().is_some() + } + + /// Returns the computed trie input, computing it if necessary. + /// + /// The first call triggers computation (which may block waiting for deferred data). + /// Subsequent calls return the cached result immediately. + pub fn get(&self) -> &TrieInputSorted { + self.inner.get_or_init(|| self.compute()) + } + + /// Returns the overlay as (nodes, state) tuple for use with `OverlayStateProviderFactory`. + pub fn as_overlay(&self) -> (Arc, Arc) { + let input = self.get(); + (Arc::clone(&input.nodes), Arc::clone(&input.state)) + } + + /// Compute the trie input overlay. + fn compute(&self) -> TrieInputSorted { + let anchor_hash = self.inputs.anchor_hash; + let blocks = &self.inputs.blocks; + + if blocks.is_empty() { + debug!(target: "chain_state::lazy_overlay", "No in-memory blocks, returning empty overlay"); + return TrieInputSorted::default(); + } + + // Fast path: Check if tip block's overlay is ready and anchor matches. + // The tip block (first in list) has the cumulative overlay from all ancestors. + if let Some(tip) = blocks.first() { + let data = tip.wait_cloned(); + if let Some(anchored) = &data.anchored_trie_input { + if anchored.anchor_hash == anchor_hash { + trace!(target: "chain_state::lazy_overlay", %anchor_hash, "Reusing tip block's cached overlay (fast path)"); + return (*anchored.trie_input).clone(); + } + debug!( + target: "chain_state::lazy_overlay", + computed_anchor = %anchored.anchor_hash, + %anchor_hash, + "Anchor mismatch, falling back to merge" + ); + } + } + + // Slow path: Merge all blocks' trie data into a new overlay. + debug!(target: "chain_state::lazy_overlay", num_blocks = blocks.len(), "Merging blocks (slow path)"); + Self::merge_blocks(blocks) + } + + /// Merge all blocks' trie data into a single [`TrieInputSorted`]. + /// + /// Blocks are ordered newest to oldest. We iterate oldest to newest so that + /// newer values override older ones. + fn merge_blocks(blocks: &[DeferredTrieData]) -> TrieInputSorted { + if blocks.is_empty() { + return TrieInputSorted::default(); + } + + // Single block: use its data directly + if blocks.len() == 1 { + let data = blocks[0].wait_cloned(); + return TrieInputSorted { + state: Arc::clone(&data.hashed_state), + nodes: Arc::clone(&data.trie_updates), + prefix_sets: Default::default(), + }; + } + + if blocks.len() < MERGE_BATCH_THRESHOLD { + // Small k: extend_ref loop is faster + // Iterate oldest->newest so newer values override older ones + let mut blocks_iter = blocks.iter().rev(); + let first = blocks_iter.next().expect("blocks is non-empty"); + let data = first.wait_cloned(); + + let mut state = Arc::clone(&data.hashed_state); + let mut nodes = Arc::clone(&data.trie_updates); + let state_mut = Arc::make_mut(&mut state); + let nodes_mut = Arc::make_mut(&mut nodes); + + for block in blocks_iter { + let data = block.wait_cloned(); + state_mut.extend_ref(data.hashed_state.as_ref()); + nodes_mut.extend_ref(data.trie_updates.as_ref()); + } + + TrieInputSorted { state, nodes, prefix_sets: Default::default() } + } else { + // Large k: merge_batch is faster (O(n log k) via k-way merge) + let trie_data: Vec<_> = blocks.iter().map(|b| b.wait_cloned()).collect(); + + let merged_state = HashedPostStateSorted::merge_batch( + trie_data.iter().map(|d| d.hashed_state.as_ref()), + ); + let merged_nodes = + TrieUpdatesSorted::merge_batch(trie_data.iter().map(|d| d.trie_updates.as_ref())); + + TrieInputSorted { + state: Arc::new(merged_state), + nodes: Arc::new(merged_nodes), + prefix_sets: Default::default(), + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use reth_trie::{updates::TrieUpdates, HashedPostState}; + + fn empty_deferred(anchor: B256) -> DeferredTrieData { + DeferredTrieData::pending( + Arc::new(HashedPostState::default()), + Arc::new(TrieUpdates::default()), + anchor, + Vec::new(), + ) + } + + #[test] + fn empty_blocks_returns_default() { + let overlay = LazyOverlay::new(B256::ZERO, vec![]); + let result = overlay.get(); + assert!(result.state.is_empty()); + assert!(result.nodes.is_empty()); + } + + #[test] + fn single_block_uses_data_directly() { + let anchor = B256::random(); + let deferred = empty_deferred(anchor); + let overlay = LazyOverlay::new(anchor, vec![deferred]); + + assert!(!overlay.is_computed()); + let _ = overlay.get(); + assert!(overlay.is_computed()); + } + + #[test] + fn cached_after_first_access() { + let overlay = LazyOverlay::new(B256::ZERO, vec![]); + + // First access computes + let _ = overlay.get(); + assert!(overlay.is_computed()); + + // Second access uses cache + let _ = overlay.get(); + assert!(overlay.is_computed()); + } +} diff --git a/crates/chain-state/src/lib.rs b/crates/chain-state/src/lib.rs index 7ba3e2316c..f6abed9146 100644 --- a/crates/chain-state/src/lib.rs +++ b/crates/chain-state/src/lib.rs @@ -14,6 +14,9 @@ pub use in_memory::*; mod deferred_trie; pub use deferred_trie::*; +mod lazy_overlay; +pub use lazy_overlay::*; + mod noop; mod chain_info; diff --git a/crates/engine/tree/src/tree/metrics.rs b/crates/engine/tree/src/tree/metrics.rs index 5acf81104e..0e9685c091 100644 --- a/crates/engine/tree/src/tree/metrics.rs +++ b/crates/engine/tree/src/tree/metrics.rs @@ -334,10 +334,6 @@ pub(crate) struct BlockValidationMetrics { pub(crate) state_root_histogram: Histogram, /// Histogram of deferred trie computation duration. pub(crate) deferred_trie_compute_duration: Histogram, - /// Histogram of time spent waiting for deferred trie data to become available. - pub(crate) deferred_trie_wait_duration: Histogram, - /// Trie input computation duration - pub(crate) trie_input_duration: Histogram, /// Payload conversion and validation latency pub(crate) payload_validation_duration: Gauge, /// Histogram of payload validation latency diff --git a/crates/engine/tree/src/tree/payload_validator.rs b/crates/engine/tree/src/tree/payload_validator.rs index 2d2dbe1cb7..9160535df6 100644 --- a/crates/engine/tree/src/tree/payload_validator.rs +++ b/crates/engine/tree/src/tree/payload_validator.rs @@ -1,11 +1,5 @@ //! Types and traits for validating blocks and payloads. -/// Threshold for switching from `extend_ref` loop to `merge_batch` in `merge_overlay_trie_input`. -/// -/// Benchmarked crossover: `extend_ref` wins up to ~64 blocks, `merge_batch` wins beyond. -/// Using 64 as threshold since they're roughly equal there. -const MERGE_BATCH_THRESHOLD: usize = 64; - use crate::tree::{ cached_state::CachedStateProvider, error::{InsertBlockError, InsertBlockErrorKind, InsertPayloadError}, @@ -22,7 +16,7 @@ use alloy_eips::{eip1898::BlockWithParent, NumHash}; use alloy_evm::Evm; use alloy_primitives::B256; use rayon::prelude::*; -use reth_chain_state::{CanonicalInMemoryState, DeferredTrieData, ExecutedBlock}; +use reth_chain_state::{CanonicalInMemoryState, DeferredTrieData, ExecutedBlock, LazyOverlay}; use reth_consensus::{ConsensusError, FullConsensus}; use reth_engine_primitives::{ ConfigureEngineEvm, ExecutableTxIterator, ExecutionPayload, InvalidBlockHook, PayloadValidator, @@ -46,10 +40,7 @@ use reth_provider::{ StateProviderFactory, StateReader, }; use reth_revm::db::State; -use reth_trie::{ - updates::{TrieUpdates, TrieUpdatesSorted}, - HashedPostState, HashedPostStateSorted, StateRoot, TrieInputSorted, -}; +use reth_trie::{updates::TrieUpdates, HashedPostState, StateRoot}; use reth_trie_db::ChangesetCache; use reth_trie_parallel::root::{ParallelStateRoot, ParallelStateRootError}; use revm_primitives::Address; @@ -430,26 +421,16 @@ where .map_err(Box::::from)) .map(Arc::new); - // Compute trie input from ancestors once, before spawning payload processor. - // This will be extended with the current block's hashed state after execution. - let trie_input_start = Instant::now(); - let (trie_input, block_hash_for_overlay) = - ensure_ok!(self.compute_trie_input(parent_hash, ctx.state())); - - self.metrics - .block_validation - .trie_input_duration - .record(trie_input_start.elapsed().as_secs_f64()); + // Create lazy overlay from ancestors - this doesn't block, allowing execution to start + // before the trie data is ready. The overlay will be computed on first access. + let (lazy_overlay, anchor_hash) = Self::get_parent_lazy_overlay(parent_hash, ctx.state()); // Create overlay factory for payload processor (StateRootTask path needs it for // multiproofs) - let overlay_factory = { - let TrieInputSorted { nodes, state, .. } = &trie_input; + let overlay_factory = OverlayStateProviderFactory::new(self.provider.clone(), self.changeset_cache.clone()) - .with_block_hash(Some(block_hash_for_overlay)) - .with_trie_overlay(Some(Arc::clone(nodes))) - .with_hashed_state_overlay(Some(Arc::clone(state))) - }; + .with_block_hash(Some(anchor_hash)) + .with_lazy_overlay(lazy_overlay); // Spawn the appropriate processor based on strategy let mut handle = ensure_ok!(self.spawn_payload_processor( @@ -953,128 +934,36 @@ where self.invalid_block_hook.on_invalid_block(parent_header, block, output, trie_updates); } - /// Computes [`TrieInputSorted`] for the provided parent hash by combining database state - /// with in-memory overlays. + /// Creates a [`LazyOverlay`] for the parent block without blocking. /// - /// The goal of this function is to take in-memory blocks and generate a [`TrieInputSorted`] - /// that extends from the highest persisted ancestor up through the parent. This enables state - /// root computation and proof generation without requiring all blocks to be persisted - /// first. + /// Returns a lazy overlay that will compute the trie input on first access, and the anchor + /// block hash (the highest persisted ancestor). This allows execution to start immediately + /// while the trie input computation is deferred until the overlay is actually needed. /// - /// It works as follows: - /// 1. Collect in-memory overlay blocks using [`crate::tree::TreeState::blocks_by_hash`]. This - /// returns the highest persisted ancestor hash (`block_hash`) and the list of in-memory - /// blocks building on top of it. - /// 2. Fast path: If the tip in-memory block's trie input is already anchored to `block_hash` - /// (its `anchor_hash` matches `block_hash`), reuse it directly. - /// 3. Slow path: Build a new [`TrieInputSorted`] by aggregating the overlay blocks (from oldest - /// to newest) on top of the database state at `block_hash`. - #[instrument( - level = "debug", - target = "engine::tree::payload_validator", - skip_all, - fields(parent_hash) - )] - fn compute_trie_input( - &self, + /// If parent is on disk (no in-memory blocks), returns `None` for the lazy overlay. + fn get_parent_lazy_overlay( parent_hash: B256, state: &EngineApiTreeState, - ) -> ProviderResult<(TrieInputSorted, B256)> { - let wait_start = Instant::now(); - let (block_hash, blocks) = + ) -> (Option, B256) { + let (anchor_hash, blocks) = state.tree_state.blocks_by_hash(parent_hash).unwrap_or_else(|| (parent_hash, vec![])); - // Fast path: if the tip block's anchor matches the persisted ancestor hash, reuse its - // TrieInput. This means the TrieInputSorted already aggregates all in-memory overlays - // from that ancestor, so we can avoid re-aggregation. - if let Some(tip_block) = blocks.first() { - let data = tip_block.trie_data(); - if let (Some(anchor_hash), Some(trie_input)) = - (data.anchor_hash(), data.trie_input().cloned()) && - anchor_hash == block_hash - { - trace!(target: "engine::tree::payload_validator", %block_hash,"Reusing trie input with matching anchor hash"); - self.metrics - .block_validation - .deferred_trie_wait_duration - .record(wait_start.elapsed().as_secs_f64()); - return Ok(((*trie_input).clone(), block_hash)); - } - } - if blocks.is_empty() { - debug!(target: "engine::tree::payload_validator", "Parent found on disk"); - } else { - debug!(target: "engine::tree::payload_validator", historical = ?block_hash, blocks = blocks.len(), "Parent found in memory"); + debug!(target: "engine::tree::payload_validator", "Parent found on disk, no lazy overlay needed"); + return (None, anchor_hash); } - // Extend with contents of parent in-memory blocks directly in sorted form. - let input = Self::merge_overlay_trie_input(&blocks); + debug!( + target: "engine::tree::payload_validator", + %anchor_hash, + num_blocks = blocks.len(), + "Creating lazy overlay for in-memory blocks" + ); - self.metrics - .block_validation - .deferred_trie_wait_duration - .record(wait_start.elapsed().as_secs_f64()); - Ok((input, block_hash)) - } + // Extract deferred trie data handles (non-blocking) + let handles: Vec = blocks.iter().map(|b| b.trie_data_handle()).collect(); - /// Aggregates in-memory blocks into a single [`TrieInputSorted`] by combining their - /// state changes. - /// - /// The input `blocks` vector is ordered newest -> oldest (see `TreeState::blocks_by_hash`). - /// - /// Uses `extend_ref` loop for small k, k-way `merge_batch` for large k. - /// See [`MERGE_BATCH_THRESHOLD`] for crossover point. - fn merge_overlay_trie_input(blocks: &[ExecutedBlock]) -> TrieInputSorted { - if blocks.is_empty() { - return TrieInputSorted::default(); - } - - // Single block: return Arc directly without cloning - if blocks.len() == 1 { - let data = blocks[0].trie_data(); - return TrieInputSorted { - state: Arc::clone(&data.hashed_state), - nodes: Arc::clone(&data.trie_updates), - prefix_sets: Default::default(), - }; - } - - if blocks.len() < MERGE_BATCH_THRESHOLD { - // Small k: extend_ref loop is faster - // Iterate oldest->newest so newer values override older ones - let mut blocks_iter = blocks.iter().rev(); - let first = blocks_iter.next().expect("blocks is non-empty"); - let data = first.trie_data(); - - let mut state = Arc::clone(&data.hashed_state); - let mut nodes = Arc::clone(&data.trie_updates); - let state_mut = Arc::make_mut(&mut state); - let nodes_mut = Arc::make_mut(&mut nodes); - - for block in blocks_iter { - let data = block.trie_data(); - state_mut.extend_ref(data.hashed_state.as_ref()); - nodes_mut.extend_ref(data.trie_updates.as_ref()); - } - - TrieInputSorted { state, nodes, prefix_sets: Default::default() } - } else { - // Large k: merge_batch is faster (O(n log k) via k-way merge) - let trie_data: Vec<_> = blocks.iter().map(|b| b.trie_data()).collect(); - - let merged_state = HashedPostStateSorted::merge_batch( - trie_data.iter().map(|d| d.hashed_state.as_ref()), - ); - let merged_nodes = - TrieUpdatesSorted::merge_batch(trie_data.iter().map(|d| d.trie_updates.as_ref())); - - TrieInputSorted { - state: Arc::new(merged_state), - nodes: Arc::new(merged_nodes), - prefix_sets: Default::default(), - } - } + (Some(LazyOverlay::new(anchor_hash, handles)), anchor_hash) } /// Spawns a background task to compute and sort trie data for the executed block. diff --git a/crates/storage/provider/src/providers/state/overlay.rs b/crates/storage/provider/src/providers/state/overlay.rs index 5c7877f7b1..23e972938c 100644 --- a/crates/storage/provider/src/providers/state/overlay.rs +++ b/crates/storage/provider/src/providers/state/overlay.rs @@ -1,6 +1,7 @@ use alloy_primitives::{BlockNumber, B256}; use metrics::{Counter, Histogram}; use parking_lot::RwLock; +use reth_chain_state::LazyOverlay; use reth_db_api::DatabaseError; use reth_errors::{ProviderError, ProviderResult}; use reth_metrics::Metrics; @@ -53,6 +54,35 @@ struct Overlay { hashed_post_state: Arc, } +/// Source of overlay data for [`OverlayStateProviderFactory`]. +/// +/// Either provides immediate pre-computed overlay data, or a lazy overlay that computes +/// on first access. +#[derive(Debug, Clone)] +pub enum OverlaySource { + /// Immediate overlay with already-computed data. + Immediate { + /// Trie updates overlay. + trie: Arc, + /// Hashed state overlay. + state: Arc, + }, + /// Lazy overlay computed on first access. + Lazy(LazyOverlay), +} + +impl OverlaySource { + /// Resolve the overlay source into (trie, state) tuple. + /// + /// For lazy overlays, this may block waiting for deferred data. + fn resolve(&self) -> (Arc, Arc) { + match self { + Self::Immediate { trie, state } => (Arc::clone(trie), Arc::clone(state)), + Self::Lazy(lazy) => lazy.as_overlay(), + } + } +} + /// Factory for creating overlay state providers with optional reverts and overlays. /// /// This factory allows building an `OverlayStateProvider` whose DB state has been reverted to a @@ -63,10 +93,8 @@ pub struct OverlayStateProviderFactory { factory: F, /// Optional block hash for collecting reverts block_hash: Option, - /// Optional trie overlay - trie_overlay: Option>, - /// Optional hashed state overlay - hashed_state_overlay: Option>, + /// Optional overlay source (lazy or immediate). + overlay_source: Option, /// Changeset cache handle for retrieving trie changesets changeset_cache: ChangesetCache, /// Metrics for tracking provider operations @@ -82,8 +110,7 @@ impl OverlayStateProviderFactory { Self { factory, block_hash: None, - trie_overlay: None, - hashed_state_overlay: None, + overlay_source: None, changeset_cache, metrics: OverlayStateProviderMetrics::default(), overlay_cache: Default::default(), @@ -97,31 +124,59 @@ impl OverlayStateProviderFactory { self } - /// Set the trie overlay. + /// Set the overlay source (lazy or immediate). /// /// This overlay will be applied on top of any reverts applied via `with_block_hash`. - pub fn with_trie_overlay(mut self, trie_overlay: Option>) -> Self { - self.trie_overlay = trie_overlay; + pub fn with_overlay_source(mut self, source: Option) -> Self { + self.overlay_source = source; self } - /// Set the hashed state overlay + /// Set a lazy overlay that will be computed on first access. + /// + /// Convenience method that wraps the lazy overlay in `OverlaySource::Lazy`. + pub fn with_lazy_overlay(mut self, lazy_overlay: Option) -> Self { + self.overlay_source = lazy_overlay.map(OverlaySource::Lazy); + self + } + + /// Set the hashed state overlay. /// /// This overlay will be applied on top of any reverts applied via `with_block_hash`. pub fn with_hashed_state_overlay( mut self, hashed_state_overlay: Option>, ) -> Self { - self.hashed_state_overlay = hashed_state_overlay; + if let Some(state) = hashed_state_overlay { + self.overlay_source = Some(OverlaySource::Immediate { + trie: Arc::new(TrieUpdatesSorted::default()), + state, + }); + } self } /// Extends the existing hashed state overlay with the given [`HashedPostStateSorted`]. + /// + /// If no overlay exists, creates a new immediate overlay with the given state. + /// If a lazy overlay exists, it is resolved first then extended. pub fn with_extended_hashed_state_overlay(mut self, other: HashedPostStateSorted) -> Self { - if let Some(overlay) = self.hashed_state_overlay.as_mut() { - Arc::make_mut(overlay).extend_ref(&other); - } else { - self.hashed_state_overlay = Some(Arc::new(other)) + match &mut self.overlay_source { + Some(OverlaySource::Immediate { state, .. }) => { + Arc::make_mut(state).extend_ref(&other); + } + Some(OverlaySource::Lazy(lazy)) => { + // Resolve lazy overlay and convert to immediate with extension + let (trie, mut state) = lazy.as_overlay(); + Arc::make_mut(&mut state).extend_ref(&other); + self.overlay_source = Some(OverlaySource::Immediate { trie, state }); + } + None => { + self.overlay_source = Some(OverlaySource::Immediate { + trie: Arc::new(TrieUpdatesSorted::default()), + state: Arc::new(other), + }); + } } self } @@ -136,6 +191,19 @@ where + DBProvider + BlockNumReader, { + /// Resolves the effective overlay (trie updates, hashed state). + /// + /// If an overlay source is set, it is resolved (blocking if lazy). + /// Otherwise, returns empty defaults. + fn resolve_overlays(&self) -> (Arc, Arc) { + match &self.overlay_source { + Some(source) => source.resolve(), + None => { + (Arc::new(TrieUpdatesSorted::default()), Arc::new(HashedPostStateSorted::default())) + } + } + } + /// Returns the block number for [`Self`]'s `block_hash` field, if any. fn get_requested_block_number( &self, @@ -267,26 +335,26 @@ where res }; - // Extend with overlays if provided. If the reverts are empty we should just use the - // overlays directly, because `extend_ref` will actually clone the overlay. - let trie_updates = match self.trie_overlay.as_ref() { - Some(trie_overlay) if trie_reverts.is_empty() => Arc::clone(trie_overlay), - Some(trie_overlay) => { - trie_reverts.extend_ref(trie_overlay); - Arc::new(trie_reverts) - } - None => Arc::new(trie_reverts), + // Resolve overlays (lazy or immediate) and extend reverts with them. + // If reverts are empty, use overlays directly to avoid cloning. + let (overlay_trie, overlay_state) = self.resolve_overlays(); + + let trie_updates = if trie_reverts.is_empty() { + overlay_trie + } else if !overlay_trie.is_empty() { + trie_reverts.extend_ref(&overlay_trie); + Arc::new(trie_reverts) + } else { + Arc::new(trie_reverts) }; - let hashed_state_updates = match self.hashed_state_overlay.as_ref() { - Some(hashed_state_overlay) if hashed_state_reverts.is_empty() => { - Arc::clone(hashed_state_overlay) - } - Some(hashed_state_overlay) => { - hashed_state_reverts.extend_ref(hashed_state_overlay); - Arc::new(hashed_state_reverts) - } - None => Arc::new(hashed_state_reverts), + let hashed_state_updates = if hashed_state_reverts.is_empty() { + overlay_state + } else if !overlay_state.is_empty() { + hashed_state_reverts.extend_ref(&overlay_state); + Arc::new(hashed_state_reverts) + } else { + Arc::new(hashed_state_reverts) }; trie_updates_total_len = trie_updates.total_len(); @@ -303,13 +371,8 @@ where (trie_updates, hashed_state_updates) } else { - // If no block_hash, use overlays directly or defaults - let trie_updates = - self.trie_overlay.clone().unwrap_or_else(|| Arc::new(TrieUpdatesSorted::default())); - let hashed_state = self - .hashed_state_overlay - .clone() - .unwrap_or_else(|| Arc::new(HashedPostStateSorted::default())); + // If no block_hash, use overlays directly (resolving lazy if set) + let (trie_updates, hashed_state) = self.resolve_overlays(); retrieve_trie_reverts_duration = Duration::ZERO; retrieve_hashed_state_reverts_duration = Duration::ZERO; @@ -337,14 +400,9 @@ where #[instrument(level = "debug", target = "providers::state::overlay", skip_all)] fn get_overlay(&self, provider: &F::Provider) -> ProviderResult { // If we have no anchor block configured then we will never need to get trie reverts, just - // return the in-memory overlay. + // return the in-memory overlay (resolving lazy overlay if set). if self.block_hash.is_none() { - let trie_updates = - self.trie_overlay.clone().unwrap_or_else(|| Arc::new(TrieUpdatesSorted::default())); - let hashed_post_state = self - .hashed_state_overlay - .clone() - .unwrap_or_else(|| Arc::new(HashedPostStateSorted::default())); + let (trie_updates, hashed_post_state) = self.resolve_overlays(); return Ok(Overlay { trie_updates, hashed_post_state }) }