feat(rocksdb): add WAL size tracking metric and Grafana dashboard (#21295)

Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
joshieDo
2026-01-23 19:59:10 +00:00
committed by GitHub
parent ab418642b4
commit 3648483512
7 changed files with 139 additions and 6 deletions

View File

@@ -205,6 +205,16 @@ impl Command {
.add_cell(Cell::new(human_bytes(total_size as f64)))
.add_cell(Cell::new(human_bytes(total_pending as f64)));
table.add_row(row);
let wal_size = tool.provider_factory.rocksdb_provider().wal_size_bytes();
let mut row = Row::new();
row.add_cell(Cell::new("WAL"))
.add_cell(Cell::new(""))
.add_cell(Cell::new(""))
.add_cell(Cell::new(""))
.add_cell(Cell::new(human_bytes(wal_size as f64)))
.add_cell(Cell::new(""));
table.add_row(row);
}
table

View File

@@ -257,6 +257,11 @@ fn describe_rocksdb_metrics() {
Unit::Bytes,
"The size of memtables for a RocksDB table"
);
describe_gauge!(
"rocksdb.wal_size",
Unit::Bytes,
"The total size of WAL (Write-Ahead Log) files. Important: this is not included in table_size or sst_size metrics"
);
}
#[cfg(all(feature = "jemalloc", unix))]

View File

@@ -39,7 +39,8 @@ pub use consistent::ConsistentProvider;
pub(crate) mod rocksdb;
pub use rocksdb::{
RocksDBBatch, RocksDBBuilder, RocksDBProvider, RocksDBRawIter, RocksDBTableStats, RocksTx,
RocksDBBatch, RocksDBBuilder, RocksDBProvider, RocksDBRawIter, RocksDBStats, RocksDBTableStats,
RocksTx,
};
/// Helper trait to bound [`NodeTypes`] so that combined with database they satisfy

View File

@@ -6,5 +6,6 @@ mod provider;
pub(crate) use provider::{PendingRocksDBBatches, RocksDBWriteCtx};
pub use provider::{
RocksDBBatch, RocksDBBuilder, RocksDBProvider, RocksDBRawIter, RocksDBTableStats, RocksTx,
RocksDBBatch, RocksDBBuilder, RocksDBProvider, RocksDBRawIter, RocksDBStats, RocksDBTableStats,
RocksTx,
};

View File

@@ -57,6 +57,19 @@ pub struct RocksDBTableStats {
pub pending_compaction_bytes: u64,
}
/// Database-level statistics for `RocksDB`.
///
/// Contains both per-table statistics and DB-level metrics like WAL size.
#[derive(Debug, Clone)]
pub struct RocksDBStats {
/// Statistics for each table (column family).
pub tables: Vec<RocksDBTableStats>,
/// Total size of WAL (Write-Ahead Log) files in bytes.
///
/// WAL is shared across all tables and not included in per-table metrics.
pub wal_size_bytes: u64,
}
/// Context for `RocksDB` block writes.
#[derive(Clone)]
pub(crate) struct RocksDBWriteCtx {
@@ -457,6 +470,31 @@ impl RocksDBProviderInner {
}
}
/// Returns the path to the database directory.
fn path(&self) -> &Path {
match self {
Self::ReadWrite { db, .. } => db.path(),
Self::ReadOnly { db, .. } => db.path(),
}
}
/// Returns the total size of WAL (Write-Ahead Log) files in bytes.
///
/// WAL files have a `.log` extension in the `RocksDB` directory.
fn wal_size_bytes(&self) -> u64 {
let path = self.path();
match std::fs::read_dir(path) {
Ok(entries) => entries
.filter_map(|e| e.ok())
.filter(|e| e.path().extension().is_some_and(|ext| ext == "log"))
.filter_map(|e| e.metadata().ok())
.map(|m| m.len())
.sum(),
Err(_) => 0,
}
}
/// Returns statistics for all column families in the database.
fn table_stats(&self) -> Vec<RocksDBTableStats> {
let mut stats = Vec::new();
@@ -515,6 +553,11 @@ impl RocksDBProviderInner {
stats
}
/// Returns database-level statistics including per-table stats and WAL size.
fn db_stats(&self) -> RocksDBStats {
RocksDBStats { tables: self.table_stats(), wal_size_bytes: self.wal_size_bytes() }
}
}
impl fmt::Debug for RocksDBProviderInner {
@@ -595,6 +638,9 @@ impl DatabaseMetrics for RocksDBProvider {
));
}
// WAL size (DB-level, shared across all tables)
metrics.push(("rocksdb.wal_size", self.wal_size_bytes() as f64, vec![]));
metrics
}
}
@@ -838,6 +884,22 @@ impl RocksDBProvider {
self.0.table_stats()
}
/// Returns the total size of WAL (Write-Ahead Log) files in bytes.
///
/// This scans the `RocksDB` directory for `.log` files and sums their sizes.
/// WAL files can be significant (e.g., 2.7GB observed) and are not included
/// in `table_size`, `sst_size`, or `memtable_size` metrics.
pub fn wal_size_bytes(&self) -> u64 {
self.0.wal_size_bytes()
}
/// Returns database-level statistics including per-table stats and WAL size.
///
/// This combines [`Self::table_stats`] and [`Self::wal_size_bytes`] into a single struct.
pub fn db_stats(&self) -> RocksDBStats {
self.0.db_stats()
}
/// Flushes pending writes for the specified tables to disk.
///
/// This performs a flush of:

View File

@@ -32,6 +32,15 @@ pub struct RocksDBTableStats {
pub pending_compaction_bytes: u64,
}
/// Database-level statistics for `RocksDB` - stub.
#[derive(Debug, Clone)]
pub struct RocksDBStats {
/// Statistics for each table (column family).
pub tables: Vec<RocksDBTableStats>,
/// Total size of WAL (Write-Ahead Log) files in bytes.
pub wal_size_bytes: u64,
}
/// Context for `RocksDB` block writes (stub).
#[derive(Debug, Clone)]
#[allow(dead_code)]
@@ -89,6 +98,21 @@ impl RocksDBProvider {
Ok(())
}
/// Returns the total size of WAL (Write-Ahead Log) files in bytes (stub implementation).
///
/// Returns 0 since there is no `RocksDB` when the feature is disabled.
pub const fn wal_size_bytes(&self) -> u64 {
0
}
/// Returns database-level statistics including per-table stats and WAL size (stub
/// implementation).
///
/// Returns empty stats since there is no `RocksDB` when the feature is disabled.
pub const fn db_stats(&self) -> RocksDBStats {
RocksDBStats { tables: Vec::new(), wal_size_bytes: 0 }
}
/// Flushes all pending writes to disk (stub implementation).
///
/// This is a no-op since there is no `RocksDB` when the feature is disabled.

View File

@@ -828,7 +828,7 @@
"uid": "${datasource}"
},
"editorMode": "code",
"expr": "sum(reth_rocksdb_table_size{$instance_label=\"$instance\"}) or vector(0)",
"expr": "(sum(reth_rocksdb_table_size{$instance_label=\"$instance\"}) or vector(0)) + (sum(reth_rocksdb_wal_size{$instance_label=\"$instance\"}) or vector(0))",
"hide": false,
"instant": false,
"legendFormat": "RocksDB",
@@ -841,7 +841,7 @@
"uid": "${datasource}"
},
"editorMode": "code",
"expr": "sum(reth_db_table_size{$instance_label=\"$instance\"}) + sum(reth_db_freelist{$instance_label=\"$instance\"} * reth_db_page_size{$instance_label=\"$instance\"}) + sum(reth_static_files_segment_size{$instance_label=\"$instance\"}) + (sum(reth_rocksdb_table_size{$instance_label=\"$instance\"}) or vector(0))",
"expr": "sum(reth_db_table_size{$instance_label=\"$instance\"}) + sum(reth_db_freelist{$instance_label=\"$instance\"} * reth_db_page_size{$instance_label=\"$instance\"}) + sum(reth_static_files_segment_size{$instance_label=\"$instance\"}) + (sum(reth_rocksdb_table_size{$instance_label=\"$instance\"}) or vector(0)) + (sum(reth_rocksdb_wal_size{$instance_label=\"$instance\"}) or vector(0))",
"hide": false,
"instant": false,
"legendFormat": "Total",
@@ -6771,6 +6771,17 @@
"legendFormat": "{{table}}",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"editorMode": "code",
"expr": "reth_rocksdb_wal_size{$instance_label=\"$instance\"}",
"legendFormat": "WAL",
"range": true,
"refId": "B"
}
],
"title": "RocksDB Tables Size",
@@ -7091,7 +7102,7 @@
"uid": "${datasource}"
},
"editorMode": "code",
"expr": "sum by (job) ( reth_rocksdb_table_size{$instance_label=\"$instance\"} )",
"expr": "sum by (job) ( reth_rocksdb_table_size{$instance_label=\"$instance\"} ) + (sum by (job) ( reth_rocksdb_wal_size{$instance_label=\"$instance\"} ) or vector(0))",
"legendFormat": "__auto",
"range": true,
"refId": "A"
@@ -12441,6 +12452,18 @@
"legendFormat": "__auto",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "${datasource}"
},
"editorMode": "code",
"expr": "label_replace(reth_rocksdb_wal_size{$instance_label=\"$instance\"}, \"table\", \"WAL\", \"\", \"\")",
"format": "table",
"legendFormat": "__auto",
"range": true,
"refId": "C"
}
],
"transformations": [
@@ -12464,6 +12487,12 @@
],
"operation": "aggregate"
},
"Value #C": {
"aggregations": [
"lastNotNull"
],
"operation": "aggregate"
},
"table": {
"aggregations": [],
"operation": "groupby"
@@ -12489,7 +12518,8 @@
"renameByName": {
"table": "Table",
"Value #A (lastNotNull)": "SST Size",
"Value #B (lastNotNull)": "Memtable Size"
"Value #B (lastNotNull)": "Memtable Size",
"Value #C (lastNotNull)": "WAL Size"
}
}
}