From a5d8fa3ae1296867caf78dc060eb27902b651fca Mon Sep 17 00:00:00 2001 From: Derek Cofausper <256792747+decofe@users.noreply.github.com> Date: Tue, 3 Mar 2026 06:44:43 -0800 Subject: [PATCH] feat(metrics): add /debug/tokio/dump endpoint for tokio task dumps (#22737) Co-authored-by: Alexey Shekhirin <5773434+shekhirin@users.noreply.github.com> --- Cargo.toml | 1 + crates/node/metrics/src/server.rs | 37 ++++++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 79e93eb6a9..12ceb56b73 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -170,6 +170,7 @@ rust.rust_2018_idioms = { level = "deny", priority = -1 } rust.unreachable_pub = "warn" rust.unused_must_use = "deny" rust.rust_2024_incompatible_pat = "warn" +rust.unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] } rustdoc.all = "warn" # rust.unnameable-types = "warn" diff --git a/crates/node/metrics/src/server.rs b/crates/node/metrics/src/server.rs index 23561c575f..56927ece36 100644 --- a/crates/node/metrics/src/server.rs +++ b/crates/node/metrics/src/server.rs @@ -148,8 +148,13 @@ impl MetricServer { let hook = hook.clone(); let pprof_dump_dir = pprof_dump_dir.clone(); let service = tower::service_fn(move |req: Request<_>| { - let response = handle_request(req.uri().path(), &*hook, handle, &pprof_dump_dir); - async move { Ok::<_, Infallible>(response) } + let hook = hook.clone(); + let pprof_dump_dir = pprof_dump_dir.clone(); + async move { + let response = + handle_request(req.uri().path(), &*hook, handle, &pprof_dump_dir).await; + Ok::<_, Infallible>(response) + } }); let mut shutdown = signal.clone().ignore_guard(); @@ -307,7 +312,7 @@ fn describe_io_stats() { #[cfg(not(target_os = "linux"))] const fn describe_io_stats() {} -fn handle_request( +async fn handle_request( path: &str, hook: impl Fn(), handle: &crate::recorder::PrometheusRecorder, @@ -315,6 +320,7 @@ fn handle_request( ) -> Response> { match path { "/debug/pprof/heap" => handle_pprof_heap(pprof_dump_dir), + "/debug/tokio/dump" => handle_tokio_dump().await, _ => { hook(); let metrics = handle.handle().render(); @@ -404,6 +410,31 @@ fn handle_pprof_heap(_pprof_dump_dir: &PathBuf) -> Response> { response } +#[cfg(tokio_unstable)] +async fn handle_tokio_dump() -> Response> { + let handle = tokio::runtime::Handle::current(); + let dump = handle.dump().await; + + let mut output = String::new(); + for (i, task) in dump.tasks().iter().enumerate() { + let trace = task.trace(); + output.push_str(&format!("task {i}:\n{trace}\n\n")); + } + + let mut response = Response::new(Full::new(Bytes::from(output))); + response.headers_mut().insert(CONTENT_TYPE, HeaderValue::from_static("text/plain")); + response +} + +#[cfg(not(tokio_unstable))] +async fn handle_tokio_dump() -> Response> { + let mut response = Response::new(Full::new(Bytes::from_static( + b"tokio task dump not available. Rebuild with RUSTFLAGS=\"--cfg tokio_unstable\" and tokio's `taskdump` feature.", + ))); + *response.status_mut() = StatusCode::NOT_IMPLEMENTED; + response +} + #[cfg(test)] mod tests { use super::*;