From b545252285de7ffe3c67f9507685212dfe444c04 Mon Sep 17 00:00:00 2001 From: Derek Cofausper <256792747+decofe@users.noreply.github.com> Date: Sun, 1 Mar 2026 21:49:02 -0800 Subject: [PATCH] perf(tasks): deprioritize background tracing/OTel threads on Linux (#22692) Co-authored-by: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Co-authored-by: Amp --- Cargo.lock | 1 + Cargo.toml | 1 + crates/ethereum/cli/src/app.rs | 3 ++ crates/tasks/Cargo.toml | 3 ++ crates/tasks/src/utils.rs | 69 ++++++++++++++++++++++++++++++++++ 5 files changed, 77 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 031b75ea69..95408e7cfc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10309,6 +10309,7 @@ dependencies = [ "crossbeam-utils", "dashmap", "futures-util", + "libc", "metrics", "parking_lot", "pin-project", diff --git a/Cargo.toml b/Cargo.toml index 6bf3a96698..e8def7e40f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -518,6 +518,7 @@ humantime = "2.1" humantime-serde = "1.1" itertools = { version = "0.14", default-features = false } linked_hash_set = "0.1" +libc = "0.2" lz4 = "1.28.1" modular-bitfield = "0.13.1" notify = { version = "8.0.0", default-features = false, features = ["macos_fsevent"] } diff --git a/crates/ethereum/cli/src/app.rs b/crates/ethereum/cli/src/app.rs index 30d83a0777..076b7965e0 100644 --- a/crates/ethereum/cli/src/app.rs +++ b/crates/ethereum/cli/src/app.rs @@ -127,6 +127,9 @@ where self.init_tracing(&runner)?; + // Deprioritize background threads spawned by tracing/OTel libraries. + reth_tasks::utils::deprioritize_background_threads(); + // Install the prometheus recorder to be sure to record all metrics install_prometheus_recorder(); diff --git a/crates/tasks/Cargo.toml b/crates/tasks/Cargo.toml index 7e8a929820..ef01fc76cb 100644 --- a/crates/tasks/Cargo.toml +++ b/crates/tasks/Cargo.toml @@ -33,6 +33,9 @@ crossbeam-utils = { workspace = true, optional = true } parking_lot = { workspace = true, optional = true } pin-project = { workspace = true, optional = true } +[target.'cfg(target_os = "linux")'.dependencies] +libc.workspace = true + [dev-dependencies] tokio = { workspace = true, features = ["sync", "rt", "rt-multi-thread", "time", "macros"] } diff --git a/crates/tasks/src/utils.rs b/crates/tasks/src/utils.rs index feabdc7a58..4fd4547119 100644 --- a/crates/tasks/src/utils.rs +++ b/crates/tasks/src/utils.rs @@ -19,3 +19,72 @@ pub fn increase_thread_priority() { } } } + +/// Deprioritizes known background threads spawned by third-party libraries (`OpenTelemetry`, +/// `tracing-appender`, `reqwest`) by scanning `/proc//task/` for matching thread names and +/// setting `SCHED_IDLE` scheduling policy + maximum niceness on them. +/// +/// This is a hack: these threads are spawned by libraries that do not expose a way to hook into +/// thread initialization or expose the TIDs, so we have to discover them after the fact by +/// reading `/proc`. +/// +/// Should be called once after tracing is initialized. +/// +/// No-op on non-Linux platforms. +pub fn deprioritize_background_threads() { + #[cfg(target_os = "linux")] + _deprioritize_background_threads(); +} + +/// Thread name prefixes to deprioritize. +#[cfg(target_os = "linux")] +const DEPRIORITIZE_THREAD_PREFIXES: &[&str] = + &["OpenTelemetry.T", "tracing-appende", "reqwest-interna"]; + +#[cfg(target_os = "linux")] +fn _deprioritize_background_threads() { + let pid = std::process::id(); + let task_dir = format!("/proc/{pid}/task"); + + let entries = match std::fs::read_dir(&task_dir) { + Ok(entries) => entries, + Err(err) => { + tracing::debug!(%err, "failed to read /proc task directory"); + return; + } + }; + + for entry in entries.filter_map(Result::ok) { + let tid_str = entry.file_name(); + let Some(tid_str) = tid_str.to_str() else { continue }; + let Ok(tid) = tid_str.parse::() else { continue }; + + let comm_path = format!("{task_dir}/{tid_str}/comm"); + let comm = match std::fs::read_to_string(&comm_path) { + Ok(c) => c, + Err(_) => continue, + }; + let comm = comm.trim(); + + if !DEPRIORITIZE_THREAD_PREFIXES.iter().any(|prefix| comm.starts_with(prefix)) { + continue; + } + + // SCHED_IDLE is the lowest-priority scheduling class. The kernel will only schedule these + // threads when no other (SCHED_OTHER/SCHED_BATCH/RT) threads need the CPU. + // SAFETY: sched_setscheduler is safe to call with a valid TID. + unsafe { + let param = libc::sched_param { sched_priority: 0 }; + if libc::sched_setscheduler(tid, libc::SCHED_IDLE, std::ptr::from_ref(¶m)) != 0 { + tracing::debug!( + tid, + comm, + err = std::io::Error::last_os_error().to_string(), + "failed to set SCHED_IDLE" + ); + } + } + + tracing::debug!(tid, comm, "deprioritized background thread (SCHED_IDLE)"); + } +}