From 665e4284356dad675baefd4aa3246094def3abc6 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Tue, 26 Oct 2021 08:34:30 +0300 Subject: [PATCH] Testsuite: attempt to find / avoid valgrind warnings of killed processes (#9679) I recently started seeing a lot of empty valgrind reports in the daily CI. i.e. prints showing valgrind header but no leak report, which causes the tests to fail https://github.com/redis/redis/runs/3991335416?check_suite_focus=true This commit change 2 things: * first, considering valgrind is just slow, we used to give processes 60 seconds timeout on shutdown instead of 10 seconds we give normally. this commit changes that to 120. * secondly, when we reach the timeout, we first try to use SIGSEGV so that maybe we'll get a stack trace indicating where redis is hang, and we only resort to SIGKILL if double that time passed. note that if there are indeed hang processes, we will normally not see that in the non-valgrind runs, since the tests didn't use to detect any failure in that case, and now they will since `crashlog_from_file` is run after `kill_server`. --- tests/instances.tcl | 7 +++++-- tests/support/server.tcl | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/instances.tcl b/tests/instances.tcl index 1395efa0c7..22d3bc4759 100644 --- a/tests/instances.tcl +++ b/tests/instances.tcl @@ -196,14 +196,17 @@ proc stop_instance pid { # Node might have been stopped in the test catch {exec kill -SIGCONT $pid} if {$::valgrind} { - set max_wait 60000 + set max_wait 120000 } else { set max_wait 10000 } while {[is_alive $pid]} { incr wait 10 - if {$wait >= $max_wait} { + if {$wait == $max_wait} { + puts "Forcing process $pid to crash..." + catch {exec kill -SEGV $pid} + } elseif {$wait >= $max_wait * 2} { puts "Forcing process $pid to exit..." catch {exec kill -KILL $pid} } elseif {$wait % 1000 == 0} { diff --git a/tests/support/server.tcl b/tests/support/server.tcl index 0563995fd7..c57fad5419 100644 --- a/tests/support/server.tcl +++ b/tests/support/server.tcl @@ -80,14 +80,17 @@ proc kill_server config { # Node might have been stopped in the test catch {exec kill -SIGCONT $pid} if {$::valgrind} { - set max_wait 60000 + set max_wait 120000 } else { set max_wait 10000 } while {[is_alive $config]} { incr wait 10 - if {$wait >= $max_wait} { + if {$wait == $max_wait} { + puts "Forcing process $pid to crash..." + catch {exec kill -SEGV $pid} + } elseif {$wait >= $max_wait * 2} { puts "Forcing process $pid to exit..." catch {exec kill -KILL $pid} } elseif {$wait % 1000 == 0} {