perf(chain-state): hoist Arc::make_mut outside loop in merge_ancestors_into_overlay

Move Arc::make_mut calls outside the ancestor iteration loop to reduce
atomic refcount check overhead from O(N) to O(1) per field.

Benchmark results show 4-6x speedup for the two-field case:
- 10 iterations: 2.0x faster
- 100 iterations: 4.8x faster
- 1000 iterations: 6.5x faster

See: https://gist.github.com/yongkangc/983d0b346499a5120b22ee063b48cb0f
This commit is contained in:
yongkangc
2026-01-08 07:32:59 +00:00
parent 8a2eb3031c
commit 91c64563df

View File

@@ -230,19 +230,24 @@ impl DeferredTrieData {
///
/// Iterates ancestors oldest -> newest so later state takes precedence.
fn merge_ancestors_into_overlay(ancestors: &[Self]) -> TrieInputSorted {
if ancestors.is_empty() {
return TrieInputSorted::default();
}
let mut overlay = TrieInputSorted::default();
// Hoist Arc::make_mut outside the loop to avoid repeated atomic refcount checks.
// This reduces O(N) atomic operations to O(1) per field.
let state_mut = Arc::make_mut(&mut overlay.state);
let nodes_mut = Arc::make_mut(&mut overlay.nodes);
// Ancestors are processed oldest -> newest so later state takes precedence.
for ancestor in ancestors {
let ancestor_data = ancestor.wait_cloned();
{
let state_mut = Arc::make_mut(&mut overlay.state);
state_mut.extend_ref(ancestor_data.hashed_state.as_ref());
}
{
let nodes_mut = Arc::make_mut(&mut overlay.nodes);
nodes_mut.extend_ref(ancestor_data.trie_updates.as_ref());
}
state_mut.extend_ref(ancestor_data.hashed_state.as_ref());
nodes_mut.extend_ref(ancestor_data.trie_updates.as_ref());
}
overlay
}