mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
repmgrd: fixes to failover handling
get_new_primary() returns NULL if no notification for the new primary has been received, but the code was expecting it to return UNKNOWN_NODE_ID, which was causing repmgrd to prematurely drop out of the new primary detection loop if no notification had been received by the time the loop started. Also store the electoral term as a single row, single column table, to ensure that all repmgrds see the same turn. It is then bumped by the winning node after it gets promoted. Various logging improvements.
This commit is contained in:
88
dbutils.c
88
dbutils.c
@@ -3578,9 +3578,52 @@ delete_monitoring_records(PGconn *primary_conn, int keep_history)
|
||||
/*
|
||||
* node voting functions
|
||||
*
|
||||
* These are intended to run under repmgrd and rely on shared memory
|
||||
* These are intended to run under repmgrd and mainly rely on shared memory
|
||||
*/
|
||||
|
||||
int
|
||||
get_current_term(PGconn *conn)
|
||||
{
|
||||
PGresult *res = NULL;
|
||||
int term = -1;
|
||||
|
||||
res = PQexec(conn, "SELECT term FROM repmgr.voting_term");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("unable to query repmgr.voting_term:\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return -1;
|
||||
}
|
||||
|
||||
term = atoi(PQgetvalue(res, 0, 0));
|
||||
|
||||
PQclear(res);
|
||||
return term;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
increment_current_term(PGconn *conn)
|
||||
{
|
||||
PGresult *res = NULL;
|
||||
|
||||
res = PQexec(conn, "UPDATE repmgr.voting_term SET term = term + 1");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_error(_("unable to increment repmgr.voting_term:\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
NodeVotingStatus
|
||||
get_voting_status(PGconn *conn)
|
||||
{
|
||||
@@ -3700,19 +3743,26 @@ request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
set_voting_status_initiated(PGconn *conn)
|
||||
void
|
||||
set_voting_status_initiated(PGconn *conn, int electoral_term)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
PGresult *res = NULL;
|
||||
int electoral_term = 0;
|
||||
|
||||
res = PQexec(conn, "SELECT repmgr.set_voting_status_initiated()");
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
"SELECT repmgr.set_voting_status_initiated(%i)",
|
||||
electoral_term);
|
||||
|
||||
res = PQexec(conn, query.data);
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
electoral_term = atoi(PQgetvalue(res, 0, 0));
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return electoral_term;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -3748,7 +3798,6 @@ notify_follow_primary(PGconn *conn, int primary_node_id)
|
||||
PQExpBufferData query;
|
||||
PGresult *res = NULL;
|
||||
|
||||
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
@@ -3756,10 +3805,17 @@ notify_follow_primary(PGconn *conn, int primary_node_id)
|
||||
primary_node_id);
|
||||
log_verbose(LOG_DEBUG, "notify_follow_primary():\n %s", query.data);
|
||||
|
||||
/* XXX handle failure */
|
||||
res = PQexec(conn, query.data);
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("unable to execute repmgr.notify_follow_primary():\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return;
|
||||
}
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("unable to execute repmgr.notify_follow_primary():\n %s"),
|
||||
@@ -3786,16 +3842,24 @@ get_new_primary(PGconn *conn, int *primary_node_id)
|
||||
|
||||
res = PQexec(conn, query.data);
|
||||
termPQExpBuffer(&query);
|
||||
/* XXX handle error */
|
||||
|
||||
new_primary_node_id = atoi(PQgetvalue(res, 0, 0));
|
||||
|
||||
if (new_primary_node_id == UNKNOWN_NODE_ID)
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("unable to execute repmgr.reset_voting_status():\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (PQgetisnull(res, 0, 0))
|
||||
{
|
||||
*primary_node_id = UNKNOWN_NODE_ID;
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
|
||||
new_primary_node_id = atoi(PQgetvalue(res, 0, 0));
|
||||
|
||||
PQclear(res);
|
||||
|
||||
*primary_node_id = new_primary_node_id;
|
||||
|
||||
@@ -474,9 +474,11 @@ bool delete_monitoring_records(PGconn *primary_conn, int keep_history);
|
||||
|
||||
|
||||
/* node voting functions */
|
||||
int get_current_term(PGconn *conn);
|
||||
void increment_current_term(PGconn *conn);
|
||||
NodeVotingStatus get_voting_status(PGconn *conn);
|
||||
VoteRequestResult request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int electoral_term);
|
||||
int set_voting_status_initiated(PGconn *conn);
|
||||
void set_voting_status_initiated(PGconn *conn, int electoral_term);
|
||||
bool announce_candidature(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int electoral_term);
|
||||
void notify_follow_primary(PGconn *conn, int primary_node_id);
|
||||
bool get_new_primary(PGconn *conn, int *primary_node_id);
|
||||
|
||||
@@ -79,6 +79,22 @@ LEFT JOIN repmgr.nodes un
|
||||
ON un.node_id = n.upstream_node_id;
|
||||
|
||||
|
||||
/* XXX update upgrade scripts! */
|
||||
CREATE TABLE repmgr.voting_term (
|
||||
term INT NOT NULL
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX voting_term_restrict
|
||||
ON repmgr.voting_term ((TRUE));
|
||||
|
||||
CREATE RULE voting_term_delete AS
|
||||
ON DELETE TO repmgr.voting_term
|
||||
DO INSTEAD NOTHING;
|
||||
|
||||
/* XXX do this in "repmgr primary register" */
|
||||
INSERT INTO repmgr.voting_term (term) VALUES (1);
|
||||
|
||||
|
||||
/* ================= */
|
||||
/* repmgrd functions */
|
||||
/* ================= */
|
||||
@@ -135,8 +151,8 @@ CREATE FUNCTION get_voting_status()
|
||||
AS 'MODULE_PATHNAME', 'get_voting_status'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION set_voting_status_initiated()
|
||||
RETURNS INT
|
||||
CREATE FUNCTION set_voting_status_initiated(INT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'set_voting_status_initiated'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
|
||||
17
repmgr.c
17
repmgr.c
@@ -427,6 +427,7 @@ get_voting_status(PG_FUNCTION_ARGS)
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
set_voting_status_initiated(PG_FUNCTION_ARGS)
|
||||
{
|
||||
@@ -434,7 +435,12 @@ set_voting_status_initiated(PG_FUNCTION_ARGS)
|
||||
int electoral_term = -1;
|
||||
|
||||
if (!shared_state)
|
||||
PG_RETURN_NULL();
|
||||
PG_RETURN_VOID();
|
||||
|
||||
if (PG_ARGISNULL(0))
|
||||
PG_RETURN_VOID();
|
||||
|
||||
electoral_term = PG_GETARG_INT32(0);
|
||||
|
||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||
|
||||
@@ -445,21 +451,18 @@ set_voting_status_initiated(PG_FUNCTION_ARGS)
|
||||
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
|
||||
|
||||
shared_state->voting_status = VS_VOTE_INITIATED;
|
||||
shared_state->current_electoral_term += 1;
|
||||
|
||||
electoral_term = shared_state->current_electoral_term;
|
||||
shared_state->current_electoral_term = electoral_term;
|
||||
|
||||
elog(INFO, "setting voting term to %i", electoral_term);
|
||||
}
|
||||
|
||||
LWLockRelease(shared_state->lock);
|
||||
|
||||
PG_RETURN_INT32(electoral_term);
|
||||
#else
|
||||
PG_RETURN_INT32(-1);
|
||||
#endif
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
other_node_is_candidate(PG_FUNCTION_ARGS)
|
||||
{
|
||||
|
||||
@@ -957,7 +957,14 @@ do_primary_failover(void)
|
||||
}
|
||||
else if (election_result == ELECTION_WON)
|
||||
{
|
||||
log_notice("this node is the winner, will now promote self and inform other nodes");
|
||||
if (standby_nodes.node_count > 0)
|
||||
{
|
||||
log_notice("this node is the winner, will now promote itself and inform other nodes");
|
||||
}
|
||||
else
|
||||
{
|
||||
log_notice("this node is the only available candidate and will now promote itself");
|
||||
}
|
||||
|
||||
failover_state = promote_self();
|
||||
}
|
||||
@@ -1034,7 +1041,7 @@ do_primary_failover(void)
|
||||
*/
|
||||
if (failover_state == FAILOVER_STATE_WAITING_NEW_PRIMARY)
|
||||
{
|
||||
int new_primary_id;
|
||||
int new_primary_id = UNKNOWN_NODE_ID;
|
||||
|
||||
/* TODO: rerun election if new primary doesn't appear after timeout */
|
||||
|
||||
@@ -1563,6 +1570,8 @@ promote_self(void)
|
||||
return FAILOVER_STATE_PROMOTION_FAILED;
|
||||
}
|
||||
|
||||
/* bump the electoral term */
|
||||
increment_current_term(local_conn);
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
@@ -1604,10 +1613,11 @@ notify_followers(NodeInfoList *standby_nodes, int follow_node_id)
|
||||
{
|
||||
NodeInfoListCell *cell;
|
||||
|
||||
log_debug("notify_followers()");
|
||||
log_debug("notify_followers(): %i followers to notify", standby_nodes->node_count);
|
||||
|
||||
for (cell = standby_nodes->head; cell; cell = cell->next)
|
||||
{
|
||||
log_debug("intending to notify node %i... ", cell->node_info->node_id);
|
||||
log_verbose(LOG_DEBUG, "intending to notify node %i... ", cell->node_info->node_id);
|
||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||
{
|
||||
log_debug("reconnecting to node %i... ", cell->node_info->node_id);
|
||||
@@ -1925,7 +1935,6 @@ static ElectionResult
|
||||
do_election(void)
|
||||
{
|
||||
int electoral_term = -1;
|
||||
|
||||
int votes_for_me = 0;
|
||||
|
||||
/* we're visible */
|
||||
@@ -1960,6 +1969,17 @@ do_election(void)
|
||||
|
||||
long unsigned rand_wait = (long) ((rand() % 35) + 10) * 10000;
|
||||
|
||||
electoral_term = get_current_term(local_conn);
|
||||
|
||||
if (electoral_term == -1)
|
||||
{
|
||||
log_error(_("unable to determine electoral term"));
|
||||
|
||||
return ELECTION_NOT_CANDIDATE;
|
||||
}
|
||||
|
||||
log_debug("do_election(): electoral term is %i", electoral_term);
|
||||
|
||||
/* get all active nodes attached to primary, excluding self */
|
||||
get_active_sibling_node_records(local_conn,
|
||||
local_node_info.node_id,
|
||||
@@ -2007,7 +2027,7 @@ do_election(void)
|
||||
* so when announcing ourselves as candidate to the other nodes, we'll
|
||||
* check for that and withdraw our candidature.
|
||||
*/
|
||||
electoral_term = set_voting_status_initiated(local_conn);
|
||||
set_voting_status_initiated(local_conn, electoral_term);
|
||||
|
||||
/* no other standbys - normally win by default */
|
||||
if (standby_nodes.node_count == 0)
|
||||
@@ -2065,7 +2085,7 @@ do_election(void)
|
||||
|
||||
/*
|
||||
* see if the node is in the primary's location (but skip the check if
|
||||
* we've seen
|
||||
* we've seen a node there already)
|
||||
*/
|
||||
if (primary_location_seen == false)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user