From ae8171e46170d6c5c796d94a28b72fa344c62555 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 4 Mar 2019 14:19:31 +0900 Subject: [PATCH] Improve logging/sanity checking for "node control" options --- repmgr-client.c | 22 ++++++++++++++++++++++ sysutils.c | 29 ++++++++++++++++++++--------- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/repmgr-client.c b/repmgr-client.c index 33091274..84c13801 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -1922,6 +1922,28 @@ check_cli_parameters(const int action) action_name(action)); } } + + /* --disable-wal-receiver / --enable-wal-receiver */ + if (runtime_options.disable_wal_receiver == true || runtime_options.enable_wal_receiver == true) + { + switch (action) + { + case NODE_CONTROL: + { + if (runtime_options.disable_wal_receiver == true && runtime_options.enable_wal_receiver == true) + { + item_list_append(&cli_errors, + _("provide either --disable-wal-receiver or --enable-wal-receiver")); + } + } + break; + default: + item_list_append_format(&cli_warnings, + _("--disable-wal-receiver / --enable-wal-receiver not effective when executing %s"), + action_name(action)); + } + } + } diff --git a/sysutils.c b/sysutils.c index c841ea7d..a1473fe8 100644 --- a/sysutils.c +++ b/sysutils.c @@ -211,12 +211,6 @@ disable_wal_receiver(PGconn *conn) return UNKNOWN_PID; } - if (wal_receiver_pid == 0) - { - log_warning(_("wal receiver not running")); - return UNKNOWN_PID; - } - get_pg_setting(conn, "wal_retrieve_retry_interval", buf); /* TODO: potentially handle atoi error, though unlikely at this point */ @@ -232,13 +226,25 @@ disable_wal_receiver(PGconn *conn) pg_reload_conf(conn); } + /* + * If, at this point, the WAL receiver is not running, we don't need to (and indeed can't) + * kill it. + */ + if (wal_receiver_pid == 0) + { + log_warning(_("wal receiver not running")); + return UNKNOWN_PID; + } + + /* why 5? */ log_info(_("sleeping 5 seconds")); sleep(5); + /* see comment below as to why we need a loop here */ for (i = 0; i < max_retries; i++) { - log_notice(_("killing walreceiver with PID %i"), (int)wal_receiver_pid); + log_notice(_("killing WAL receiver with PID %i"), (int)wal_receiver_pid); kill((int)wal_receiver_pid, SIGTERM); @@ -248,13 +254,18 @@ disable_wal_receiver(PGconn *conn) if (kill_ret != 0) { - log_info(_("wal receiver with pid %i killed"), (int)wal_receiver_pid); + log_info(_("WAL receiver with pid %i killed"), (int)wal_receiver_pid); break; } sleep(1); } - /* */ + /* + * Wait briefly to check that the WAL receiver has indeed gone away - + * for reasons as yet unclear, after a server start/restart, immediately + * after the first time a WAL receiver is killed, a new one is started + * straight away, so we'll need to kill that too. + */ sleep(1); wal_receiver_pid = (pid_t)get_wal_receiver_pid(conn); if (wal_receiver_pid == UNKNOWN_PID || wal_receiver_pid == 0)