From 5ef200eaad20c1930d5856a55ffb5ed5d9e37311 Mon Sep 17 00:00:00 2001 From: Georgios Konstantopoulos Date: Tue, 20 Jan 2026 07:58:43 -0800 Subject: [PATCH] perf(db): stack-allocate ShardedKey and StorageShardedKey encoding (#21200) Co-authored-by: Amp --- Cargo.lock | 1 + crates/storage/db-api/Cargo.toml | 5 + .../db-api/benches/sharded_key_encode.rs | 142 ++++++++++++++++++ .../storage/db-api/src/models/sharded_key.rs | 70 +++++++-- .../db-api/src/models/storage_sharded_key.rs | 60 +++++++- 5 files changed, 263 insertions(+), 15 deletions(-) create mode 100644 crates/storage/db-api/benches/sharded_key_encode.rs diff --git a/Cargo.lock b/Cargo.lock index e4fc4fc741..66fee9d2e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8099,6 +8099,7 @@ dependencies = [ "alloy-primitives", "arbitrary", "bytes", + "codspeed-criterion-compat", "derive_more", "metrics", "modular-bitfield", diff --git a/crates/storage/db-api/Cargo.toml b/crates/storage/db-api/Cargo.toml index 49e4c84f7a..e25595f1ac 100644 --- a/crates/storage/db-api/Cargo.toml +++ b/crates/storage/db-api/Cargo.toml @@ -60,6 +60,11 @@ test-fuzz.workspace = true arbitrary = { workspace = true, features = ["derive"] } proptest.workspace = true proptest-arbitrary-interop.workspace = true +criterion.workspace = true + +[[bench]] +name = "sharded_key_encode" +harness = false [features] test-utils = [ diff --git a/crates/storage/db-api/benches/sharded_key_encode.rs b/crates/storage/db-api/benches/sharded_key_encode.rs new file mode 100644 index 0000000000..5366e234e0 --- /dev/null +++ b/crates/storage/db-api/benches/sharded_key_encode.rs @@ -0,0 +1,142 @@ +//! Benchmarks for `ShardedKey` and `StorageShardedKey` encoding. +//! +//! These benchmarks measure the performance of stack-allocated vs heap-allocated key encoding, +//! inspired by Anza Labs' PR #3603 which saved ~20k allocations/sec by moving `RocksDB` keys +//! from heap to stack. +//! +//! Run with: `cargo bench -p reth-db-api --bench sharded_key_encode` + +#![allow(missing_docs)] + +use alloy_primitives::{Address, B256}; +use criterion::{black_box, criterion_group, criterion_main, BatchSize, Criterion, Throughput}; +use reth_db_api::{ + models::{storage_sharded_key::StorageShardedKey, ShardedKey}, + table::Encode, +}; + +/// Number of keys to encode per iteration for throughput measurement. +const BATCH_SIZE: usize = 10_000; + +fn bench_sharded_key_address_encode(c: &mut Criterion) { + let mut group = c.benchmark_group("sharded_key_encode"); + group.throughput(Throughput::Elements(BATCH_SIZE as u64)); + + // Pre-generate test data + let keys: Vec> = (0..BATCH_SIZE) + .map(|i| { + let mut addr_bytes = [0u8; 20]; + addr_bytes[..8].copy_from_slice(&(i as u64).to_be_bytes()); + ShardedKey::new(Address::from(addr_bytes), i as u64) + }) + .collect(); + + group.bench_function("ShardedKey
::encode", |b| { + b.iter_batched( + || keys.clone(), + |keys| { + for key in keys { + let encoded = black_box(key.encode()); + black_box(encoded.as_ref()); + } + }, + BatchSize::SmallInput, + ) + }); + + group.finish(); +} + +fn bench_storage_sharded_key_encode(c: &mut Criterion) { + let mut group = c.benchmark_group("storage_sharded_key_encode"); + group.throughput(Throughput::Elements(BATCH_SIZE as u64)); + + // Pre-generate test data + let keys: Vec = (0..BATCH_SIZE) + .map(|i| { + let mut addr_bytes = [0u8; 20]; + addr_bytes[..8].copy_from_slice(&(i as u64).to_be_bytes()); + let mut key_bytes = [0u8; 32]; + key_bytes[..8].copy_from_slice(&(i as u64).to_be_bytes()); + StorageShardedKey::new(Address::from(addr_bytes), B256::from(key_bytes), i as u64) + }) + .collect(); + + group.bench_function("StorageShardedKey::encode", |b| { + b.iter_batched( + || keys.clone(), + |keys| { + for key in keys { + let encoded = black_box(key.encode()); + black_box(encoded.as_ref()); + } + }, + BatchSize::SmallInput, + ) + }); + + group.finish(); +} + +fn bench_encode_decode_roundtrip(c: &mut Criterion) { + use reth_db_api::table::Decode; + + let mut group = c.benchmark_group("sharded_key_roundtrip"); + group.throughput(Throughput::Elements(BATCH_SIZE as u64)); + + let keys: Vec> = (0..BATCH_SIZE) + .map(|i| { + let mut addr_bytes = [0u8; 20]; + addr_bytes[..8].copy_from_slice(&(i as u64).to_be_bytes()); + ShardedKey::new(Address::from(addr_bytes), i as u64) + }) + .collect(); + + group.bench_function("ShardedKey
::encode_then_decode", |b| { + b.iter_batched( + || keys.clone(), + |keys| { + for key in keys { + let encoded = key.encode(); + let decoded = black_box(ShardedKey::
::decode(&encoded).unwrap()); + black_box(decoded); + } + }, + BatchSize::SmallInput, + ) + }); + + let storage_keys: Vec = (0..BATCH_SIZE) + .map(|i| { + let mut addr_bytes = [0u8; 20]; + addr_bytes[..8].copy_from_slice(&(i as u64).to_be_bytes()); + let mut key_bytes = [0u8; 32]; + key_bytes[..8].copy_from_slice(&(i as u64).to_be_bytes()); + StorageShardedKey::new(Address::from(addr_bytes), B256::from(key_bytes), i as u64) + }) + .collect(); + + group.bench_function("StorageShardedKey::encode_then_decode", |b| { + b.iter_batched( + || storage_keys.clone(), + |keys| { + for key in keys { + let encoded = key.encode(); + let decoded = black_box(StorageShardedKey::decode(&encoded).unwrap()); + black_box(decoded); + } + }, + BatchSize::SmallInput, + ) + }); + + group.finish(); +} + +criterion_group!( + benches, + bench_sharded_key_address_encode, + bench_storage_sharded_key_encode, + bench_encode_decode_roundtrip, +); +criterion_main!(benches); diff --git a/crates/storage/db-api/src/models/sharded_key.rs b/crates/storage/db-api/src/models/sharded_key.rs index fdd583f0f5..ae8b0cf5ae 100644 --- a/crates/storage/db-api/src/models/sharded_key.rs +++ b/crates/storage/db-api/src/models/sharded_key.rs @@ -3,13 +3,16 @@ use crate::{ table::{Decode, Encode}, DatabaseError, }; -use alloy_primitives::BlockNumber; +use alloy_primitives::{Address, BlockNumber}; use serde::{Deserialize, Serialize}; use std::hash::Hash; /// Number of indices in one shard. pub const NUM_OF_INDICES_IN_SHARD: usize = 2_000; +/// Size of `BlockNumber` in bytes (u64 = 8 bytes). +const BLOCK_NUMBER_SIZE: usize = std::mem::size_of::(); + /// Sometimes data can be too big to be saved for a single key. This helps out by dividing the data /// into different shards. Example: /// @@ -43,21 +46,68 @@ impl ShardedKey { } } -impl Encode for ShardedKey { - type Encoded = Vec; +/// Stack-allocated encoded key for `ShardedKey
`. +/// +/// This avoids heap allocation in hot database paths. The key layout is: +/// - 20 bytes: `Address` +/// - 8 bytes: `BlockNumber` (big-endian) +pub type ShardedKeyAddressEncoded = [u8; 20 + BLOCK_NUMBER_SIZE]; +impl Encode for ShardedKey
{ + type Encoded = ShardedKeyAddressEncoded; + + #[inline] fn encode(self) -> Self::Encoded { - let mut buf: Vec = Encode::encode(self.key).into(); - buf.extend_from_slice(&self.highest_block_number.to_be_bytes()); + let mut buf = [0u8; 20 + BLOCK_NUMBER_SIZE]; + buf[..20].copy_from_slice(self.key.as_slice()); + buf[20..].copy_from_slice(&self.highest_block_number.to_be_bytes()); buf } } -impl Decode for ShardedKey { +impl Decode for ShardedKey
{ fn decode(value: &[u8]) -> Result { - let (key, highest_tx_number) = value.split_last_chunk().ok_or(DatabaseError::Decode)?; - let key = T::decode(key)?; - let highest_tx_number = u64::from_be_bytes(*highest_tx_number); - Ok(Self::new(key, highest_tx_number)) + if value.len() != 20 + BLOCK_NUMBER_SIZE { + return Err(DatabaseError::Decode); + } + let key = Address::from_slice(&value[..20]); + let highest_block_number = + u64::from_be_bytes(value[20..].try_into().map_err(|_| DatabaseError::Decode)?); + Ok(Self::new(key, highest_block_number)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use alloy_primitives::address; + + #[test] + fn sharded_key_address_encode_decode_roundtrip() { + let addr = address!("0102030405060708091011121314151617181920"); + let block_num = 0x123456789ABCDEF0u64; + let key = ShardedKey::new(addr, block_num); + + let encoded = key.encode(); + + // Verify it's stack-allocated (28 bytes) + assert_eq!(encoded.len(), 28); + assert_eq!(std::mem::size_of_val(&encoded), 28); + + // Verify roundtrip (check against expected values since key was consumed) + let decoded = ShardedKey::
::decode(&encoded).unwrap(); + assert_eq!(decoded.key, address!("0102030405060708091011121314151617181920")); + assert_eq!(decoded.highest_block_number, 0x123456789ABCDEF0u64); + } + + #[test] + fn sharded_key_last_works() { + let addr = address!("0102030405060708091011121314151617181920"); + let key = ShardedKey::
::last(addr); + assert_eq!(key.highest_block_number, u64::MAX); + + let encoded = key.encode(); + let decoded = ShardedKey::
::decode(&encoded).unwrap(); + assert_eq!(decoded.highest_block_number, u64::MAX); } } diff --git a/crates/storage/db-api/src/models/storage_sharded_key.rs b/crates/storage/db-api/src/models/storage_sharded_key.rs index 6c7e40e273..d9f724cdf5 100644 --- a/crates/storage/db-api/src/models/storage_sharded_key.rs +++ b/crates/storage/db-api/src/models/storage_sharded_key.rs @@ -16,6 +16,14 @@ pub const NUM_OF_INDICES_IN_SHARD: usize = 2_000; /// The fields are: 20-byte address, 32-byte key, and 8-byte block number const STORAGE_SHARD_KEY_BYTES_SIZE: usize = 20 + 32 + 8; +/// Stack-allocated encoded key for `StorageShardedKey`. +/// +/// This avoids heap allocation in hot database paths. The key layout is: +/// - 20 bytes: `Address` +/// - 32 bytes: `B256` storage key +/// - 8 bytes: `BlockNumber` (big-endian) +pub type StorageShardedKeyEncoded = [u8; STORAGE_SHARD_KEY_BYTES_SIZE]; + /// Sometimes data can be too big to be saved for a single key. This helps out by dividing the data /// into different shards. Example: /// @@ -54,13 +62,14 @@ impl StorageShardedKey { } impl Encode for StorageShardedKey { - type Encoded = Vec; + type Encoded = StorageShardedKeyEncoded; + #[inline] fn encode(self) -> Self::Encoded { - let mut buf: Vec = Vec::with_capacity(STORAGE_SHARD_KEY_BYTES_SIZE); - buf.extend_from_slice(&Encode::encode(self.address)); - buf.extend_from_slice(&Encode::encode(self.sharded_key.key)); - buf.extend_from_slice(&self.sharded_key.highest_block_number.to_be_bytes()); + let mut buf = [0u8; STORAGE_SHARD_KEY_BYTES_SIZE]; + buf[..20].copy_from_slice(self.address.as_slice()); + buf[20..52].copy_from_slice(self.sharded_key.key.as_slice()); + buf[52..].copy_from_slice(&self.sharded_key.highest_block_number.to_be_bytes()); buf } } @@ -81,3 +90,44 @@ impl Decode for StorageShardedKey { Ok(Self { address, sharded_key: ShardedKey::new(storage_key, highest_block_number) }) } } + +#[cfg(test)] +mod tests { + use super::*; + use alloy_primitives::{address, b256}; + + #[test] + fn storage_sharded_key_encode_decode_roundtrip() { + let addr = address!("0102030405060708091011121314151617181920"); + let storage_key = b256!("0001020304050607080910111213141516171819202122232425262728293031"); + let block_num = 0x123456789ABCDEFu64; + let key = StorageShardedKey::new(addr, storage_key, block_num); + + let encoded = key.encode(); + + // Verify it's stack-allocated (60 bytes) + assert_eq!(encoded.len(), 60); + assert_eq!(std::mem::size_of_val(&encoded), 60); + + // Verify roundtrip (check against expected values since key was consumed) + let decoded = StorageShardedKey::decode(&encoded).unwrap(); + assert_eq!(decoded.address, address!("0102030405060708091011121314151617181920")); + assert_eq!( + decoded.sharded_key.key, + b256!("0001020304050607080910111213141516171819202122232425262728293031") + ); + assert_eq!(decoded.sharded_key.highest_block_number, 0x123456789ABCDEFu64); + } + + #[test] + fn storage_sharded_key_last_works() { + let addr = address!("0102030405060708091011121314151617181920"); + let storage_key = b256!("0001020304050607080910111213141516171819202122232425262728293031"); + let key = StorageShardedKey::last(addr, storage_key); + assert_eq!(key.sharded_key.highest_block_number, u64::MAX); + + let encoded = key.encode(); + let decoded = StorageShardedKey::decode(&encoded).unwrap(); + assert_eq!(decoded.sharded_key.highest_block_number, u64::MAX); + } +}