perf(tasks): deprioritize background tracing/OTel threads on Linux (#22692)

Co-authored-by: DaniPopes <57450786+DaniPopes@users.noreply.github.com>
Co-authored-by: Amp <amp@ampcode.com>
This commit is contained in:
Derek Cofausper
2026-03-01 21:49:02 -08:00
committed by GitHub
parent 6f7c8ad2c9
commit b545252285
5 changed files with 77 additions and 0 deletions

1
Cargo.lock generated
View File

@@ -10309,6 +10309,7 @@ dependencies = [
"crossbeam-utils",
"dashmap",
"futures-util",
"libc",
"metrics",
"parking_lot",
"pin-project",

View File

@@ -518,6 +518,7 @@ humantime = "2.1"
humantime-serde = "1.1"
itertools = { version = "0.14", default-features = false }
linked_hash_set = "0.1"
libc = "0.2"
lz4 = "1.28.1"
modular-bitfield = "0.13.1"
notify = { version = "8.0.0", default-features = false, features = ["macos_fsevent"] }

View File

@@ -127,6 +127,9 @@ where
self.init_tracing(&runner)?;
// Deprioritize background threads spawned by tracing/OTel libraries.
reth_tasks::utils::deprioritize_background_threads();
// Install the prometheus recorder to be sure to record all metrics
install_prometheus_recorder();

View File

@@ -33,6 +33,9 @@ crossbeam-utils = { workspace = true, optional = true }
parking_lot = { workspace = true, optional = true }
pin-project = { workspace = true, optional = true }
[target.'cfg(target_os = "linux")'.dependencies]
libc.workspace = true
[dev-dependencies]
tokio = { workspace = true, features = ["sync", "rt", "rt-multi-thread", "time", "macros"] }

View File

@@ -19,3 +19,72 @@ pub fn increase_thread_priority() {
}
}
}
/// Deprioritizes known background threads spawned by third-party libraries (`OpenTelemetry`,
/// `tracing-appender`, `reqwest`) by scanning `/proc/<pid>/task/` for matching thread names and
/// setting `SCHED_IDLE` scheduling policy + maximum niceness on them.
///
/// This is a hack: these threads are spawned by libraries that do not expose a way to hook into
/// thread initialization or expose the TIDs, so we have to discover them after the fact by
/// reading `/proc`.
///
/// Should be called once after tracing is initialized.
///
/// No-op on non-Linux platforms.
pub fn deprioritize_background_threads() {
#[cfg(target_os = "linux")]
_deprioritize_background_threads();
}
/// Thread name prefixes to deprioritize.
#[cfg(target_os = "linux")]
const DEPRIORITIZE_THREAD_PREFIXES: &[&str] =
&["OpenTelemetry.T", "tracing-appende", "reqwest-interna"];
#[cfg(target_os = "linux")]
fn _deprioritize_background_threads() {
let pid = std::process::id();
let task_dir = format!("/proc/{pid}/task");
let entries = match std::fs::read_dir(&task_dir) {
Ok(entries) => entries,
Err(err) => {
tracing::debug!(%err, "failed to read /proc task directory");
return;
}
};
for entry in entries.filter_map(Result::ok) {
let tid_str = entry.file_name();
let Some(tid_str) = tid_str.to_str() else { continue };
let Ok(tid) = tid_str.parse::<i32>() else { continue };
let comm_path = format!("{task_dir}/{tid_str}/comm");
let comm = match std::fs::read_to_string(&comm_path) {
Ok(c) => c,
Err(_) => continue,
};
let comm = comm.trim();
if !DEPRIORITIZE_THREAD_PREFIXES.iter().any(|prefix| comm.starts_with(prefix)) {
continue;
}
// SCHED_IDLE is the lowest-priority scheduling class. The kernel will only schedule these
// threads when no other (SCHED_OTHER/SCHED_BATCH/RT) threads need the CPU.
// SAFETY: sched_setscheduler is safe to call with a valid TID.
unsafe {
let param = libc::sched_param { sched_priority: 0 };
if libc::sched_setscheduler(tid, libc::SCHED_IDLE, std::ptr::from_ref(&param)) != 0 {
tracing::debug!(
tid,
comm,
err = std::io::Error::last_os_error().to_string(),
"failed to set SCHED_IDLE"
);
}
}
tracing::debug!(tid, comm, "deprioritized background thread (SCHED_IDLE)");
}
}