mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-25 16:16:29 +00:00
Improve logging when disabling/enabling WAL receiver
Also check action is being run on node which is in recovery.
This commit is contained in:
104
sysutils.c
104
sysutils.c
@@ -184,8 +184,11 @@ pid_t
|
|||||||
disable_wal_receiver(PGconn *conn)
|
disable_wal_receiver(PGconn *conn)
|
||||||
{
|
{
|
||||||
char buf[MAXLEN];
|
char buf[MAXLEN];
|
||||||
int wal_retrieve_retry_interval;
|
int wal_retrieve_retry_interval, new_wal_retrieve_retry_interval;
|
||||||
pid_t wal_receiver_pid = UNKNOWN_PID;
|
pid_t wal_receiver_pid = UNKNOWN_PID;
|
||||||
|
int kill_ret;
|
||||||
|
int i, j;
|
||||||
|
int max_retries = 2;
|
||||||
|
|
||||||
if (is_superuser_connection(conn, NULL) == false)
|
if (is_superuser_connection(conn, NULL) == false)
|
||||||
{
|
{
|
||||||
@@ -193,64 +196,70 @@ disable_wal_receiver(PGconn *conn)
|
|||||||
return UNKNOWN_PID;
|
return UNKNOWN_PID;
|
||||||
}
|
}
|
||||||
|
|
||||||
get_pg_setting(conn, "wal_retrieve_retry_interval", buf);
|
if (get_recovery_type(conn) == RECTYPE_PRIMARY)
|
||||||
|
|
||||||
// XXX handle error
|
|
||||||
wal_retrieve_retry_interval = atoi(buf);
|
|
||||||
|
|
||||||
|
|
||||||
if (wal_retrieve_retry_interval < WALRECEIVER_DISABLE_TIMEOUT_VALUE)
|
|
||||||
{
|
{
|
||||||
alter_system_int(conn, "wal_retrieve_retry_interval", wal_retrieve_retry_interval + WALRECEIVER_DISABLE_TIMEOUT_VALUE);
|
log_error(_("node is not in recovery"));
|
||||||
pg_reload_conf(conn);
|
log_detail(_("wal receiver can only run on standby nodes"));
|
||||||
|
return UNKNOWN_PID;
|
||||||
}
|
}
|
||||||
|
|
||||||
wal_receiver_pid = (pid_t)get_wal_receiver_pid(conn);
|
wal_receiver_pid = (pid_t)get_wal_receiver_pid(conn);
|
||||||
|
|
||||||
if (wal_receiver_pid == UNKNOWN_PID)
|
if (wal_receiver_pid == UNKNOWN_PID)
|
||||||
{
|
{
|
||||||
log_warning(_("unable to retrieve walreceiver PID"));
|
log_warning(_("unable to retrieve wal receiver PID"));
|
||||||
return UNKNOWN_PID;
|
return UNKNOWN_PID;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (wal_receiver_pid == 0)
|
if (wal_receiver_pid == 0)
|
||||||
{
|
{
|
||||||
log_warning(_("walreceiver not running"));
|
log_warning(_("wal receiver not running"));
|
||||||
|
return UNKNOWN_PID;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
get_pg_setting(conn, "wal_retrieve_retry_interval", buf);
|
||||||
|
|
||||||
|
/* TODO: potentially handle atoi error, though unlikely at this point */
|
||||||
|
wal_retrieve_retry_interval = atoi(buf);
|
||||||
|
|
||||||
|
new_wal_retrieve_retry_interval = wal_retrieve_retry_interval + WALRECEIVER_DISABLE_TIMEOUT_VALUE;
|
||||||
|
|
||||||
|
if (wal_retrieve_retry_interval < WALRECEIVER_DISABLE_TIMEOUT_VALUE)
|
||||||
{
|
{
|
||||||
int kill_ret;
|
log_notice(_("setting \"wal_retrieve_retry_interval\" to %i milliseconds"),
|
||||||
int i, j;
|
new_wal_retrieve_retry_interval);
|
||||||
int max_retries = 2;
|
alter_system_int(conn, "wal_retrieve_retry_interval", new_wal_retrieve_retry_interval);
|
||||||
|
pg_reload_conf(conn);
|
||||||
for (i = 0; i < max_retries; i++)
|
|
||||||
{
|
|
||||||
/* why 5? */
|
|
||||||
sleep(5);
|
|
||||||
log_notice(_("killing walreceiver with PID %i"), (int)wal_receiver_pid);
|
|
||||||
|
|
||||||
kill((int)wal_receiver_pid, SIGTERM);
|
|
||||||
|
|
||||||
for (j = 0; j < 30; j++)
|
|
||||||
{
|
|
||||||
kill_ret = kill(wal_receiver_pid, 0);
|
|
||||||
|
|
||||||
if (kill_ret != 0)
|
|
||||||
{
|
|
||||||
log_info("killed");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
sleep(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* */
|
|
||||||
sleep(1);
|
|
||||||
wal_receiver_pid = (pid_t)get_wal_receiver_pid(conn);
|
|
||||||
if (wal_receiver_pid == UNKNOWN_PID || wal_receiver_pid == 0)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* why 5? */
|
||||||
|
log_info(_("sleeping 5 seconds"));
|
||||||
|
sleep(5);
|
||||||
|
|
||||||
|
for (i = 0; i < max_retries; i++)
|
||||||
|
{
|
||||||
|
log_notice(_("killing walreceiver with PID %i"), (int)wal_receiver_pid);
|
||||||
|
|
||||||
|
kill((int)wal_receiver_pid, SIGTERM);
|
||||||
|
|
||||||
|
for (j = 0; j < 30; j++)
|
||||||
|
{
|
||||||
|
kill_ret = kill(wal_receiver_pid, 0);
|
||||||
|
|
||||||
|
if (kill_ret != 0)
|
||||||
|
{
|
||||||
|
log_info(_("wal receiver with pid %i killed"), (int)wal_receiver_pid);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
sleep(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* */
|
||||||
|
sleep(1);
|
||||||
|
wal_receiver_pid = (pid_t)get_wal_receiver_pid(conn);
|
||||||
|
if (wal_receiver_pid == UNKNOWN_PID || wal_receiver_pid == 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
return wal_receiver_pid;
|
return wal_receiver_pid;
|
||||||
}
|
}
|
||||||
@@ -271,6 +280,13 @@ enable_wal_receiver(PGconn *conn)
|
|||||||
return UNKNOWN_PID;
|
return UNKNOWN_PID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (get_recovery_type(conn) == RECTYPE_PRIMARY)
|
||||||
|
{
|
||||||
|
log_error(_("node is not in recovery"));
|
||||||
|
log_detail(_("wal receiver can only run on standby nodes"));
|
||||||
|
return UNKNOWN_PID;
|
||||||
|
}
|
||||||
|
|
||||||
if (get_pg_setting(conn, "wal_retrieve_retry_interval", buf) == false)
|
if (get_pg_setting(conn, "wal_retrieve_retry_interval", buf) == false)
|
||||||
{
|
{
|
||||||
log_error(_("unable to retrieve \"wal_retrieve_retry_interval\""));
|
log_error(_("unable to retrieve \"wal_retrieve_retry_interval\""));
|
||||||
@@ -322,5 +338,7 @@ enable_wal_receiver(PGconn *conn)
|
|||||||
return UNKNOWN_PID;
|
return UNKNOWN_PID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log_info(_("WAL receiver started up with PID %i"), (int)wal_receiver_pid);
|
||||||
|
|
||||||
return wal_receiver_pid;
|
return wal_receiver_pid;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user