From 454ebabe89c6a4e75da628d4b4c0874265acf8e0 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 4 Mar 2019 11:43:54 +0900 Subject: [PATCH] Improve logging when disabling/enabling WAL receiver Also check action is being run on node which is in recovery. --- sysutils.c | 104 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 43 deletions(-) diff --git a/sysutils.c b/sysutils.c index a0751481..c841ea7d 100644 --- a/sysutils.c +++ b/sysutils.c @@ -184,8 +184,11 @@ pid_t disable_wal_receiver(PGconn *conn) { char buf[MAXLEN]; - int wal_retrieve_retry_interval; + int wal_retrieve_retry_interval, new_wal_retrieve_retry_interval; pid_t wal_receiver_pid = UNKNOWN_PID; + int kill_ret; + int i, j; + int max_retries = 2; if (is_superuser_connection(conn, NULL) == false) { @@ -193,64 +196,70 @@ disable_wal_receiver(PGconn *conn) return UNKNOWN_PID; } - get_pg_setting(conn, "wal_retrieve_retry_interval", buf); - - // XXX handle error - wal_retrieve_retry_interval = atoi(buf); - - - if (wal_retrieve_retry_interval < WALRECEIVER_DISABLE_TIMEOUT_VALUE) + if (get_recovery_type(conn) == RECTYPE_PRIMARY) { - alter_system_int(conn, "wal_retrieve_retry_interval", wal_retrieve_retry_interval + WALRECEIVER_DISABLE_TIMEOUT_VALUE); - pg_reload_conf(conn); + log_error(_("node is not in recovery")); + log_detail(_("wal receiver can only run on standby nodes")); + return UNKNOWN_PID; } wal_receiver_pid = (pid_t)get_wal_receiver_pid(conn); if (wal_receiver_pid == UNKNOWN_PID) { - log_warning(_("unable to retrieve walreceiver PID")); + log_warning(_("unable to retrieve wal receiver PID")); return UNKNOWN_PID; } if (wal_receiver_pid == 0) { - log_warning(_("walreceiver not running")); + log_warning(_("wal receiver not running")); + return UNKNOWN_PID; } - else + + get_pg_setting(conn, "wal_retrieve_retry_interval", buf); + + /* TODO: potentially handle atoi error, though unlikely at this point */ + wal_retrieve_retry_interval = atoi(buf); + + new_wal_retrieve_retry_interval = wal_retrieve_retry_interval + WALRECEIVER_DISABLE_TIMEOUT_VALUE; + + if (wal_retrieve_retry_interval < WALRECEIVER_DISABLE_TIMEOUT_VALUE) { - int kill_ret; - int i, j; - int max_retries = 2; - - for (i = 0; i < max_retries; i++) - { - /* why 5? */ - sleep(5); - log_notice(_("killing walreceiver with PID %i"), (int)wal_receiver_pid); - - kill((int)wal_receiver_pid, SIGTERM); - - for (j = 0; j < 30; j++) - { - kill_ret = kill(wal_receiver_pid, 0); - - if (kill_ret != 0) - { - log_info("killed"); - break; - } - sleep(1); - } - - /* */ - sleep(1); - wal_receiver_pid = (pid_t)get_wal_receiver_pid(conn); - if (wal_receiver_pid == UNKNOWN_PID || wal_receiver_pid == 0) - break; - } + log_notice(_("setting \"wal_retrieve_retry_interval\" to %i milliseconds"), + new_wal_retrieve_retry_interval); + alter_system_int(conn, "wal_retrieve_retry_interval", new_wal_retrieve_retry_interval); + pg_reload_conf(conn); } + /* why 5? */ + log_info(_("sleeping 5 seconds")); + sleep(5); + + for (i = 0; i < max_retries; i++) + { + log_notice(_("killing walreceiver with PID %i"), (int)wal_receiver_pid); + + kill((int)wal_receiver_pid, SIGTERM); + + for (j = 0; j < 30; j++) + { + kill_ret = kill(wal_receiver_pid, 0); + + if (kill_ret != 0) + { + log_info(_("wal receiver with pid %i killed"), (int)wal_receiver_pid); + break; + } + sleep(1); + } + + /* */ + sleep(1); + wal_receiver_pid = (pid_t)get_wal_receiver_pid(conn); + if (wal_receiver_pid == UNKNOWN_PID || wal_receiver_pid == 0) + break; + } return wal_receiver_pid; } @@ -271,6 +280,13 @@ enable_wal_receiver(PGconn *conn) return UNKNOWN_PID; } + if (get_recovery_type(conn) == RECTYPE_PRIMARY) + { + log_error(_("node is not in recovery")); + log_detail(_("wal receiver can only run on standby nodes")); + return UNKNOWN_PID; + } + if (get_pg_setting(conn, "wal_retrieve_retry_interval", buf) == false) { log_error(_("unable to retrieve \"wal_retrieve_retry_interval\"")); @@ -322,5 +338,7 @@ enable_wal_receiver(PGconn *conn) return UNKNOWN_PID; } + log_info(_("WAL receiver started up with PID %i"), (int)wal_receiver_pid); + return wal_receiver_pid; }