feat(metrics): jemalloc heap dump endpoint (#20811)

This commit is contained in:
Alexey Shekhirin
2026-01-07 19:36:08 +00:00
committed by GitHub
parent d756e8310a
commit 6f0ef914b9
13 changed files with 206 additions and 15 deletions

78
Cargo.lock generated
View File

@@ -5252,6 +5252,23 @@ version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
[[package]]
name = "jemalloc_pprof"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74ff642505c7ce8d31c0d43ec0e235c6fd4585d9b8172d8f9dd04d36590200b5"
dependencies = [
"anyhow",
"libc",
"mappings",
"once_cell",
"pprof_util",
"tempfile",
"tikv-jemalloc-ctl",
"tokio",
"tracing",
]
[[package]] [[package]]
name = "jni" name = "jni"
version = "0.21.1" version = "0.21.1"
@@ -5782,6 +5799,19 @@ dependencies = [
"syn 2.0.113", "syn 2.0.113",
] ]
[[package]]
name = "mappings"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db4d277bb50d4508057e7bddd7fcd19ef4a4cc38051b6a5a36868d75ae2cbeb9"
dependencies = [
"anyhow",
"libc",
"once_cell",
"pprof_util",
"tracing",
]
[[package]] [[package]]
name = "match-lookup" name = "match-lookup"
version = "0.1.1" version = "0.1.1"
@@ -6522,7 +6552,7 @@ dependencies = [
"opentelemetry-http", "opentelemetry-http",
"opentelemetry-proto", "opentelemetry-proto",
"opentelemetry_sdk", "opentelemetry_sdk",
"prost", "prost 0.14.1",
"reqwest", "reqwest",
"thiserror 2.0.17", "thiserror 2.0.17",
"tokio", "tokio",
@@ -6538,7 +6568,7 @@ checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f"
dependencies = [ dependencies = [
"opentelemetry", "opentelemetry",
"opentelemetry_sdk", "opentelemetry_sdk",
"prost", "prost 0.14.1",
"tonic", "tonic",
"tonic-prost", "tonic-prost",
] ]
@@ -6879,6 +6909,20 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "pprof_util"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4429d44e5e2c8a69399fc0070379201eed018e3df61e04eb7432811df073c224"
dependencies = [
"anyhow",
"backtrace",
"flate2",
"num",
"paste",
"prost 0.13.5",
]
[[package]] [[package]]
name = "ppv-lite86" name = "ppv-lite86"
version = "0.2.21" version = "0.2.21"
@@ -7065,6 +7109,16 @@ dependencies = [
"syn 2.0.113", "syn 2.0.113",
] ]
[[package]]
name = "prost"
version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
dependencies = [
"bytes",
"prost-derive 0.13.5",
]
[[package]] [[package]]
name = "prost" name = "prost"
version = "0.14.1" version = "0.14.1"
@@ -7072,7 +7126,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d"
dependencies = [ dependencies = [
"bytes", "bytes",
"prost-derive", "prost-derive 0.14.1",
]
[[package]]
name = "prost-derive"
version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
dependencies = [
"anyhow",
"itertools 0.14.0",
"proc-macro2",
"quote",
"syn 2.0.113",
] ]
[[package]] [[package]]
@@ -9478,8 +9545,11 @@ dependencies = [
name = "reth-node-metrics" name = "reth-node-metrics"
version = "1.9.3" version = "1.9.3"
dependencies = [ dependencies = [
"bytes",
"eyre", "eyre",
"http", "http",
"http-body-util",
"jemalloc_pprof",
"jsonrpsee-server", "jsonrpsee-server",
"metrics", "metrics",
"metrics-exporter-prometheus", "metrics-exporter-prometheus",
@@ -13025,7 +13095,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67" checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67"
dependencies = [ dependencies = [
"bytes", "bytes",
"prost", "prost 0.14.1",
"tonic", "tonic",
] ]

View File

@@ -684,6 +684,7 @@ ethereum_ssz = "0.9.0"
ethereum_ssz_derive = "0.9.0" ethereum_ssz_derive = "0.9.0"
# allocators # allocators
jemalloc_pprof = { version = "0.8", default-features = false }
tikv-jemalloc-ctl = "0.6" tikv-jemalloc-ctl = "0.6"
tikv-jemallocator = "0.6" tikv-jemallocator = "0.6"
tracy-client = "0.18.0" tracy-client = "0.18.0"

View File

@@ -116,6 +116,11 @@ jemalloc-prof = [
"reth-cli-util/jemalloc", "reth-cli-util/jemalloc",
"reth-cli-util/jemalloc-prof", "reth-cli-util/jemalloc-prof",
"reth-ethereum-cli/jemalloc-prof", "reth-ethereum-cli/jemalloc-prof",
"reth-node-metrics/jemalloc-prof",
]
jemalloc-symbols = [
"jemalloc-prof",
"reth-ethereum-cli/jemalloc-symbols",
] ]
jemalloc-unprefixed = [ jemalloc-unprefixed = [
"reth-cli-util/jemalloc-unprefixed", "reth-cli-util/jemalloc-unprefixed",

View File

@@ -3,6 +3,10 @@
#[global_allocator] #[global_allocator]
static ALLOC: reth_cli_util::allocator::Allocator = reth_cli_util::allocator::new_allocator(); static ALLOC: reth_cli_util::allocator::Allocator = reth_cli_util::allocator::new_allocator();
#[cfg(all(feature = "jemalloc-prof", unix))]
#[unsafe(export_name = "_rjem_malloc_conf")]
static MALLOC_CONF: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:19\0";
use clap::Parser; use clap::Parser;
use reth::{args::RessArgs, cli::Cli, ress::install_ress_subprotocol}; use reth::{args::RessArgs, cli::Cli, ress::install_ress_subprotocol};
use reth_ethereum_cli::chainspec::EthereumChainSpecParser; use reth_ethereum_cli::chainspec::EthereumChainSpecParser;

View File

@@ -51,7 +51,12 @@ jemalloc = [
"reth-node-metrics/jemalloc", "reth-node-metrics/jemalloc",
] ]
jemalloc-prof = [ jemalloc-prof = [
"reth-node-core/jemalloc", "jemalloc",
"reth-node-metrics/jemalloc-prof",
]
jemalloc-symbols = [
"jemalloc-prof",
"reth-node-metrics/jemalloc-symbols",
] ]
tracy-allocator = [] tracy-allocator = []

View File

@@ -152,6 +152,15 @@ jemalloc = [
"reth-ethereum-cli?/jemalloc", "reth-ethereum-cli?/jemalloc",
"reth-node-core?/jemalloc", "reth-node-core?/jemalloc",
] ]
jemalloc-prof = [
"jemalloc",
"reth-cli-util?/jemalloc-prof",
"reth-ethereum-cli?/jemalloc-prof",
]
jemalloc-symbols = [
"jemalloc-prof",
"reth-ethereum-cli?/jemalloc-symbols",
]
js-tracer = [ js-tracer = [
"rpc", "rpc",
"reth-rpc/js-tracer", "reth-rpc/js-tracer",

View File

@@ -664,7 +664,8 @@ where
} }
}) })
.build(), .build(),
).with_push_gateway(self.node_config().metrics.push_gateway_url.clone(), self.node_config().metrics.push_gateway_interval); )
.with_push_gateway(self.node_config().metrics.push_gateway_url.clone(), self.node_config().metrics.push_gateway_interval);
MetricServer::new(config).serve().await?; MetricServer::new(config).serve().await?;
} }

View File

@@ -20,6 +20,8 @@ tokio.workspace = true
jsonrpsee-server.workspace = true jsonrpsee-server.workspace = true
http.workspace = true http.workspace = true
http-body-util.workspace = true
bytes.workspace = true
tower.workspace = true tower.workspace = true
reqwest.workspace = true reqwest.workspace = true
@@ -28,6 +30,7 @@ eyre.workspace = true
[target.'cfg(unix)'.dependencies] [target.'cfg(unix)'.dependencies]
tikv-jemalloc-ctl = { workspace = true, optional = true, features = ["stats"] } tikv-jemalloc-ctl = { workspace = true, optional = true, features = ["stats"] }
jemalloc_pprof = { workspace = true, optional = true }
[target.'cfg(target_os = "linux")'.dependencies] [target.'cfg(target_os = "linux")'.dependencies]
procfs = "0.17.0" procfs = "0.17.0"
@@ -41,3 +44,5 @@ workspace = true
[features] [features]
jemalloc = ["dep:tikv-jemalloc-ctl"] jemalloc = ["dep:tikv-jemalloc-ctl"]
jemalloc-prof = ["jemalloc", "dep:jemalloc_pprof"]
jemalloc-symbols = ["jemalloc-prof", "jemalloc_pprof?/symbolize"]

View File

@@ -4,8 +4,10 @@ use crate::{
recorder::install_prometheus_recorder, recorder::install_prometheus_recorder,
version::VersionInfo, version::VersionInfo,
}; };
use bytes::Bytes;
use eyre::WrapErr; use eyre::WrapErr;
use http::{header::CONTENT_TYPE, HeaderValue, Response}; use http::{header::CONTENT_TYPE, HeaderValue, Request, Response, StatusCode};
use http_body_util::Full;
use metrics::describe_gauge; use metrics::describe_gauge;
use metrics_process::Collector; use metrics_process::Collector;
use reqwest::Client; use reqwest::Client;
@@ -139,13 +141,8 @@ impl MetricServer {
let handle = install_prometheus_recorder(); let handle = install_prometheus_recorder();
let hook = hook.clone(); let hook = hook.clone();
let service = tower::service_fn(move |_| { let service = tower::service_fn(move |req: Request<_>| {
(hook)(); let response = handle_request(req.uri().path(), &*hook, handle);
let metrics = handle.handle().render();
let mut response = Response::new(metrics);
response
.headers_mut()
.insert(CONTENT_TYPE, HeaderValue::from_static("text/plain"));
async move { Ok::<_, Infallible>(response) } async move { Ok::<_, Infallible>(response) }
}); });
@@ -287,6 +284,76 @@ fn describe_io_stats() {
#[cfg(not(target_os = "linux"))] #[cfg(not(target_os = "linux"))]
const fn describe_io_stats() {} const fn describe_io_stats() {}
fn handle_request(
path: &str,
hook: impl Fn(),
handle: &crate::recorder::PrometheusRecorder,
) -> Response<Full<Bytes>> {
match path {
"/debug/pprof/heap" => handle_pprof_heap(),
_ => {
hook();
let metrics = handle.handle().render();
let mut response = Response::new(Full::new(Bytes::from(metrics)));
response.headers_mut().insert(CONTENT_TYPE, HeaderValue::from_static("text/plain"));
response
}
}
}
#[cfg(all(feature = "jemalloc-prof", unix))]
fn handle_pprof_heap() -> Response<Full<Bytes>> {
use http::header::CONTENT_ENCODING;
match jemalloc_pprof::PROF_CTL.as_ref() {
Some(prof_ctl) => match prof_ctl.try_lock() {
Ok(mut ctl) => match ctl.dump_pprof() {
Ok(pprof) => {
let mut response = Response::new(Full::new(Bytes::from(pprof)));
response
.headers_mut()
.insert(CONTENT_TYPE, HeaderValue::from_static("application/octet-stream"));
response
.headers_mut()
.insert(CONTENT_ENCODING, HeaderValue::from_static("gzip"));
response
}
Err(err) => {
let mut response = Response::new(Full::new(Bytes::from(format!(
"Failed to dump pprof: {err}"
))));
*response.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
response
}
},
Err(_) => {
let mut response = Response::new(Full::new(Bytes::from_static(
b"Profile dump already in progress. Try again later.",
)));
*response.status_mut() = StatusCode::SERVICE_UNAVAILABLE;
response
}
},
None => {
let mut response = Response::new(Full::new(Bytes::from_static(
b"jemalloc profiling not enabled. \
Set MALLOC_CONF=prof:true or rebuild with jemalloc-prof feature.",
)));
*response.status_mut() = StatusCode::INTERNAL_SERVER_ERROR;
response
}
}
}
#[cfg(not(all(feature = "jemalloc-prof", unix)))]
fn handle_pprof_heap() -> Response<Full<Bytes>> {
let mut response = Response::new(Full::new(Bytes::from_static(
b"jemalloc pprof support not compiled. Rebuild with the jemalloc-prof feature.",
)));
*response.status_mut() = StatusCode::NOT_IMPLEMENTED;
response
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

View File

@@ -36,7 +36,8 @@ js-tracer = [
] ]
jemalloc = ["reth-cli-util/jemalloc", "reth-optimism-cli/jemalloc"] jemalloc = ["reth-cli-util/jemalloc", "reth-optimism-cli/jemalloc"]
jemalloc-prof = ["reth-cli-util/jemalloc-prof"] jemalloc-prof = ["jemalloc", "reth-cli-util/jemalloc-prof", "reth-optimism-cli/jemalloc-prof"]
jemalloc-symbols = ["jemalloc-prof", "reth-optimism-cli/jemalloc-symbols"]
tracy-allocator = ["reth-cli-util/tracy-allocator"] tracy-allocator = ["reth-cli-util/tracy-allocator"]
asm-keccak = ["reth-optimism-cli/asm-keccak", "reth-optimism-node/asm-keccak"] asm-keccak = ["reth-optimism-cli/asm-keccak", "reth-optimism-node/asm-keccak"]

View File

@@ -8,6 +8,10 @@ use tracing::info;
#[global_allocator] #[global_allocator]
static ALLOC: reth_cli_util::allocator::Allocator = reth_cli_util::allocator::new_allocator(); static ALLOC: reth_cli_util::allocator::Allocator = reth_cli_util::allocator::new_allocator();
#[cfg(all(feature = "jemalloc-prof", unix))]
#[unsafe(export_name = "_rjem_malloc_conf")]
static MALLOC_CONF: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:19\0";
fn main() { fn main() {
reth_cli_util::sigsegv_handler::install(); reth_cli_util::sigsegv_handler::install();

View File

@@ -90,6 +90,14 @@ jemalloc = [
"reth-node-core/jemalloc", "reth-node-core/jemalloc",
"reth-node-metrics/jemalloc", "reth-node-metrics/jemalloc",
] ]
jemalloc-prof = [
"jemalloc",
"reth-node-metrics/jemalloc-prof",
]
jemalloc-symbols = [
"jemalloc-prof",
"reth-node-metrics/jemalloc-symbols",
]
dev = [ dev = [
"dep:proptest", "dep:proptest",

View File

@@ -161,3 +161,14 @@ If everything is working, this will output `jeprof.*.heap` files while reth is r
[The jemalloc website](https://jemalloc.net/jemalloc.3.html#opt.abort) has a helpful overview of the options available, for example `lg_prof_interval`, `lg_prof_sample`, `prof_leak`, and `prof_final`. [The jemalloc website](https://jemalloc.net/jemalloc.3.html#opt.abort) has a helpful overview of the options available, for example `lg_prof_interval`, `lg_prof_sample`, `prof_leak`, and `prof_final`.
Now that we have the heap snapshots, we can analyze them using `jeprof`. An example of jeprof usage and output can be seen on the jemalloc github repository: https://github.com/jemalloc/jemalloc/wiki/Use-Case:-Leak-Checking Now that we have the heap snapshots, we can analyze them using `jeprof`. An example of jeprof usage and output can be seen on the jemalloc github repository: https://github.com/jemalloc/jemalloc/wiki/Use-Case:-Leak-Checking
### HTTP pprof endpoint
When built with the `jemalloc-prof` feature, reth exposes a heap profiling endpoint on the metrics server (default port 9001) at `/debug/pprof/heap`. This endpoint returns heap profiles in [pprof format](https://github.com/google/pprof), which is compatible with the standard `pprof` toolchain.
By default, the pprof output contains raw addresses that require external symbolization. You need either `addr2line` or `llvm-addr2line` in your PATH for `pprof` to resolve function names.
For pre-symbolized profiles (useful on macOS or when external tools are unavailable), build with the `jemalloc-symbols` feature:
```
cargo build --features jemalloc-prof,jemalloc-symbols --profile profiling
```