Add functionality to "pause" repmgrd

In some circumstances, e.g. while performing a switchover, it is essential
that repmgrd does not take any kind of failover action, as this will put
the cluster into an incorrect state.

Previously it was necessary to stop repmgrd on all nodes (or at least
those nodes which repmgrd would consider as promotion candidates), however
this is a cumbersome and potentially risk-prone operation, particularly if the
replication cluster contains more than a couple of servers.

To prevent this issue from occurring, this patch introduces the ability
to "pause" repmgrd on all nodes wth a single command ("repmgr daemon pause")
which notifies repmgrd not to take any failover action until the node
is "unpaused" ("repmgr daemon unpause").

"repmgr daemon status" provides an overview of each node and whether repmgrd
is running, and if so whether it is paused.

"repmgr standby switchover" has been modified to automatically pause repmgrd
while carrying out the switchover.

See documentation for further details.
This commit is contained in:
Ian Barwick
2018-09-27 16:42:10 +09:00
parent fce3c02760
commit 2491b8ae52
27 changed files with 1943 additions and 121 deletions

130
dbutils.c
View File

@@ -1627,7 +1627,6 @@ repmgrd_set_local_node_id(PGconn *conn, int local_node_id)
}
int
repmgrd_get_local_node_id(PGconn *conn)
{
@@ -1686,6 +1685,135 @@ server_in_exclusive_backup_mode(PGconn *conn)
}
void
repmgrd_set_pid(PGconn *conn, pid_t repmgrd_pid, const char *pidfile)
{
PQExpBufferData query;
PGresult *res = NULL;
log_verbose(LOG_DEBUG, "repmgrd_set_pid(): pid is %i", (int) repmgrd_pid);
initPQExpBuffer(&query);
appendPQExpBuffer(&query,
"SELECT repmgr.set_repmgrd_pid(%i, '%s')",
(int) repmgrd_pid, pidfile);
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_error(_("unable to execute \"SELECT repmgr.set_repmgrd_pid()\""));
log_detail("%s", PQerrorMessage(conn));
}
PQclear(res);
return;
}
pid_t
repmgrd_get_pid(PGconn *conn)
{
PGresult *res = NULL;
pid_t repmgrd_pid = UNKNOWN_PID;
res = PQexec(conn, "SELECT repmgr.get_repmgrd_pid()");
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_error(_("unable to execute \"SELECT repmgr.get_repmgrd_pid()\""));
log_detail("%s", PQerrorMessage(conn));
}
else if (!PQgetisnull(res, 0, 0))
{
repmgrd_pid = atoi(PQgetvalue(res, 0, 0));
}
PQclear(res);
return repmgrd_pid;
}
bool
repmgrd_is_running(PGconn *conn)
{
PGresult *res = NULL;
bool is_running = false;
res = PQexec(conn, "SELECT repmgr.repmgrd_is_running()");
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_error(_("unable to execute \"SELECT repmgr.repmgrd_is_running()\""));
log_detail("%s", PQerrorMessage(conn));
}
else if (!PQgetisnull(res, 0, 0))
{
is_running = atobool(PQgetvalue(res, 0, 0));
}
PQclear(res);
return is_running;
}
bool
repmgrd_is_paused(PGconn *conn)
{
PGresult *res = NULL;
bool is_paused = false;
res = PQexec(conn, "SELECT repmgr.repmgrd_is_paused()");
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_error(_("unable to execute \"SELECT repmgr.repmgrd_is_paused()\""));
log_detail("%s", PQerrorMessage(conn));
}
else if (!PQgetisnull(res, 0, 0))
{
is_paused = atobool(PQgetvalue(res, 0, 0));
}
PQclear(res);
return is_paused;
}
bool
repmgrd_pause(PGconn *conn, bool pause)
{
PQExpBufferData query;
PGresult *res = NULL;
bool success = true;
initPQExpBuffer(&query);
appendPQExpBuffer(&query,
"SELECT repmgr.repmgrd_pause(%s)",
pause == true ? "TRUE" : "FALSE");
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_error(_("unable to execute \"SELECT repmgr.repmgrd_pause()\""));
log_detail("%s", PQerrorMessage(conn));
success = false;
}
PQclear(res);
return success;
}
/* ================ */
/* result functions */
/* ================ */