From ef282bd75f9c0f7ca8db1b2a20bdfa628bd6c225 Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 12 Dec 2022 23:38:12 +0800 Subject: [PATCH] Fix timing issue in replication test (#11611) There is a timing issue in the test, happens with valgrind: ``` *** [err]: diskless fast replicas drop during rdb pipe in tests/integration/replication.tcl log message of '"*Loading DB in memory*"' not found in ./tests/tmp/server.3580.246/stdout after line: 0 till line: 39 ``` The server logs: ``` 43465:S 03 Dec 2022 01:26:25.664 * Trying a partial resynchronization (request 15155fa24af0539b70428f9b41f4f7129d774560:1). 43465:S 03 Dec 2022 01:26:35.133 * Full resync from master: 8ddf5a3f7c8ca1061c6b29aa84e7c985c5b29c61:680 ``` From the logs, we can see it took almost 10s to get full resync response, happens with valgrind. it's extremely slow. So i guess it's just an insufficient wait_for_condition timeout. Set the time to 15s, and modify other similar places at the same time. --- tests/integration/replication.tcl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index 4867b5afd2..e98eb8982c 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -857,7 +857,7 @@ start_server {tags {"repl external:skip"}} { # wait for the replicas to start reading the rdb # using the log file since the replica only responds to INFO once in 2mb - wait_for_log_messages -1 {"*Loading DB in memory*"} 0 800 10 + wait_for_log_messages -1 {"*Loading DB in memory*"} 0 1500 10 if {$measure_time} { set master_statfile "/proc/$master_pid/stat" @@ -990,7 +990,7 @@ test "diskless replication child being killed is collected" { $replica replicaof $master_host $master_port # wait for the replicas to start reading the rdb - wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 800 10 + wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 1500 10 # wait to be sure the replica is hung and the master is blocked on write after 500 @@ -1075,7 +1075,7 @@ test "diskless replication read pipe cleanup" { $replica replicaof $master_host $master_port # wait for the replicas to start reading the rdb - wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 800 10 + wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 1500 10 set loglines [count_log_lines -1] # send FLUSHALL so the RDB child will be killed