From 39f0a33f78bb61d8afb3d08b770247ad1673a1ea Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Jan 2013 10:29:14 +0100 Subject: [PATCH] Whitelist SIGUSR1 to avoid auto-triggering errors. This commit fixes issue #875 that was caused by the following events: 1) There is an active child doing BGSAVE. 2) flushall is called (or any other condition that makes Redis killing the saving child process). 3) An error is sensed by Redis as the child exited with an error (killed by a singal), that stops accepting write commands until a BGSAVE happens to be executed with success. Whitelisting SIGUSR1 and making sure Redis always uses this signal in order to kill its own children fixes the issue. --- src/aof.c | 2 +- src/db.c | 2 +- src/rdb.c | 5 ++++- src/redis.c | 4 ++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/aof.c b/src/aof.c index 7e1512ebdb..8dc1dd1881 100644 --- a/src/aof.c +++ b/src/aof.c @@ -177,7 +177,7 @@ void stopAppendOnly(void) { redisLog(REDIS_NOTICE,"Killing running AOF rewrite child: %ld", (long) server.aof_child_pid); - if (kill(server.aof_child_pid,SIGKILL) != -1) + if (kill(server.aof_child_pid,SIGUSR1) != -1) wait3(&statloc,0,NULL); /* reset the buffer accumulating changes while the child saves */ aofRewriteBufferReset(); diff --git a/src/db.c b/src/db.c index 1cb89e3c2b..a7b196f5a8 100644 --- a/src/db.c +++ b/src/db.c @@ -219,7 +219,7 @@ void flushallCommand(redisClient *c) { server.dirty += emptyDb(); addReply(c,shared.ok); if (server.rdb_child_pid != -1) { - kill(server.rdb_child_pid,SIGKILL); + kill(server.rdb_child_pid,SIGUSR1); rdbRemoveTempFile(server.rdb_child_pid); } if (server.saveparamslen > 0) { diff --git a/src/rdb.c b/src/rdb.c index 17f2ef840e..a5e4c47af4 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1194,7 +1194,10 @@ void backgroundSaveDoneHandler(int exitcode, int bysignal) { redisLog(REDIS_WARNING, "Background saving terminated by signal %d", bysignal); rdbRemoveTempFile(server.rdb_child_pid); - server.lastbgsave_status = REDIS_ERR; + /* SIGUSR1 is whitelisted, so we have a way to kill a child without + * tirggering an error conditon. */ + if (bysignal != SIGUSR1) + server.lastbgsave_status = REDIS_ERR; } server.rdb_child_pid = -1; server.rdb_save_time_last = time(NULL)-server.rdb_save_time_start; diff --git a/src/redis.c b/src/redis.c index 00774bae31..cb78de7ed7 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1695,7 +1695,7 @@ int prepareForShutdown(int flags) { overwrite the synchronous saving did by SHUTDOWN. */ if (server.rdb_child_pid != -1) { redisLog(REDIS_WARNING,"There is a child saving an .rdb. Killing it!"); - kill(server.rdb_child_pid,SIGKILL); + kill(server.rdb_child_pid,SIGUSR1); rdbRemoveTempFile(server.rdb_child_pid); } if (server.aof_state != REDIS_AOF_OFF) { @@ -1704,7 +1704,7 @@ int prepareForShutdown(int flags) { if (server.aof_child_pid != -1) { redisLog(REDIS_WARNING, "There is a child rewriting the AOF. Killing it!"); - kill(server.aof_child_pid,SIGKILL); + kill(server.aof_child_pid,SIGUSR1); } /* Append only file: fsync() the AOF and exit */ redisLog(REDIS_NOTICE,"Calling fsync() on the AOF file.");