diff --git a/dbutils.c b/dbutils.c index 33b9c37d..c7ecef9e 100644 --- a/dbutils.c +++ b/dbutils.c @@ -3578,9 +3578,52 @@ delete_monitoring_records(PGconn *primary_conn, int keep_history) /* * node voting functions * - * These are intended to run under repmgrd and rely on shared memory + * These are intended to run under repmgrd and mainly rely on shared memory */ +int +get_current_term(PGconn *conn) +{ + PGresult *res = NULL; + int term = -1; + + res = PQexec(conn, "SELECT term FROM repmgr.voting_term"); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_error(_("unable to query repmgr.voting_term:\n %s"), + PQerrorMessage(conn)); + PQclear(res); + return -1; + } + + term = atoi(PQgetvalue(res, 0, 0)); + + PQclear(res); + return term; +} + + +void +increment_current_term(PGconn *conn) +{ + PGresult *res = NULL; + + res = PQexec(conn, "UPDATE repmgr.voting_term SET term = term + 1"); + + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + log_error(_("unable to increment repmgr.voting_term:\n %s"), + PQerrorMessage(conn)); + PQclear(res); + return; + } + + PQclear(res); + return; +} + + NodeVotingStatus get_voting_status(PGconn *conn) { @@ -3700,19 +3743,26 @@ request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int } -int -set_voting_status_initiated(PGconn *conn) +void +set_voting_status_initiated(PGconn *conn, int electoral_term) { + PQExpBufferData query; PGresult *res = NULL; - int electoral_term = 0; - res = PQexec(conn, "SELECT repmgr.set_voting_status_initiated()"); + initPQExpBuffer(&query); + + appendPQExpBuffer(&query, + "SELECT repmgr.set_voting_status_initiated(%i)", + electoral_term); + + res = PQexec(conn, query.data); + termPQExpBuffer(&query); electoral_term = atoi(PQgetvalue(res, 0, 0)); PQclear(res); - return electoral_term; + return; } @@ -3748,7 +3798,6 @@ notify_follow_primary(PGconn *conn, int primary_node_id) PQExpBufferData query; PGresult *res = NULL; - initPQExpBuffer(&query); appendPQExpBuffer(&query, @@ -3756,10 +3805,17 @@ notify_follow_primary(PGconn *conn, int primary_node_id) primary_node_id); log_verbose(LOG_DEBUG, "notify_follow_primary():\n %s", query.data); - /* XXX handle failure */ res = PQexec(conn, query.data); termPQExpBuffer(&query); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_error(_("unable to execute repmgr.notify_follow_primary():\n %s"), + PQerrorMessage(conn)); + PQclear(res); + return; + } + if (PQresultStatus(res) != PGRES_TUPLES_OK) { log_error(_("unable to execute repmgr.notify_follow_primary():\n %s"), @@ -3786,16 +3842,24 @@ get_new_primary(PGconn *conn, int *primary_node_id) res = PQexec(conn, query.data); termPQExpBuffer(&query); - /* XXX handle error */ - new_primary_node_id = atoi(PQgetvalue(res, 0, 0)); - - if (new_primary_node_id == UNKNOWN_NODE_ID) + if (PQresultStatus(res) != PGRES_TUPLES_OK) { + log_error(_("unable to execute repmgr.reset_voting_status():\n %s"), + PQerrorMessage(conn)); PQclear(res); return false; } + if (PQgetisnull(res, 0, 0)) + { + *primary_node_id = UNKNOWN_NODE_ID; + PQclear(res); + return false; + } + + new_primary_node_id = atoi(PQgetvalue(res, 0, 0)); + PQclear(res); *primary_node_id = new_primary_node_id; diff --git a/dbutils.h b/dbutils.h index fbd002cc..c4dc9843 100644 --- a/dbutils.h +++ b/dbutils.h @@ -474,9 +474,11 @@ bool delete_monitoring_records(PGconn *primary_conn, int keep_history); /* node voting functions */ +int get_current_term(PGconn *conn); +void increment_current_term(PGconn *conn); NodeVotingStatus get_voting_status(PGconn *conn); VoteRequestResult request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int electoral_term); -int set_voting_status_initiated(PGconn *conn); +void set_voting_status_initiated(PGconn *conn, int electoral_term); bool announce_candidature(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int electoral_term); void notify_follow_primary(PGconn *conn, int primary_node_id); bool get_new_primary(PGconn *conn, int *primary_node_id); diff --git a/repmgr--4.0.sql b/repmgr--4.0.sql index b84c87f3..c9811f70 100644 --- a/repmgr--4.0.sql +++ b/repmgr--4.0.sql @@ -79,6 +79,22 @@ LEFT JOIN repmgr.nodes un ON un.node_id = n.upstream_node_id; +/* XXX update upgrade scripts! */ +CREATE TABLE repmgr.voting_term ( + term INT NOT NULL +); + +CREATE UNIQUE INDEX voting_term_restrict +ON repmgr.voting_term ((TRUE)); + +CREATE RULE voting_term_delete AS + ON DELETE TO repmgr.voting_term + DO INSTEAD NOTHING; + +/* XXX do this in "repmgr primary register" */ +INSERT INTO repmgr.voting_term (term) VALUES (1); + + /* ================= */ /* repmgrd functions */ /* ================= */ @@ -135,8 +151,8 @@ CREATE FUNCTION get_voting_status() AS 'MODULE_PATHNAME', 'get_voting_status' LANGUAGE C STRICT; -CREATE FUNCTION set_voting_status_initiated() - RETURNS INT +CREATE FUNCTION set_voting_status_initiated(INT) + RETURNS VOID AS 'MODULE_PATHNAME', 'set_voting_status_initiated' LANGUAGE C STRICT; diff --git a/repmgr.c b/repmgr.c index 11632de4..bd25c520 100644 --- a/repmgr.c +++ b/repmgr.c @@ -427,6 +427,7 @@ get_voting_status(PG_FUNCTION_ARGS) #endif } + Datum set_voting_status_initiated(PG_FUNCTION_ARGS) { @@ -434,7 +435,12 @@ set_voting_status_initiated(PG_FUNCTION_ARGS) int electoral_term = -1; if (!shared_state) - PG_RETURN_NULL(); + PG_RETURN_VOID(); + + if (PG_ARGISNULL(0)) + PG_RETURN_VOID(); + + electoral_term = PG_GETARG_INT32(0); LWLockAcquire(shared_state->lock, LW_SHARED); @@ -445,21 +451,18 @@ set_voting_status_initiated(PG_FUNCTION_ARGS) LWLockAcquire(shared_state->lock, LW_EXCLUSIVE); shared_state->voting_status = VS_VOTE_INITIATED; - shared_state->current_electoral_term += 1; - - electoral_term = shared_state->current_electoral_term; + shared_state->current_electoral_term = electoral_term; elog(INFO, "setting voting term to %i", electoral_term); } LWLockRelease(shared_state->lock); - PG_RETURN_INT32(electoral_term); -#else - PG_RETURN_INT32(-1); #endif + PG_RETURN_VOID(); } + Datum other_node_is_candidate(PG_FUNCTION_ARGS) { diff --git a/repmgrd-physical.c b/repmgrd-physical.c index c963f222..4ee9063b 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -957,7 +957,14 @@ do_primary_failover(void) } else if (election_result == ELECTION_WON) { - log_notice("this node is the winner, will now promote self and inform other nodes"); + if (standby_nodes.node_count > 0) + { + log_notice("this node is the winner, will now promote itself and inform other nodes"); + } + else + { + log_notice("this node is the only available candidate and will now promote itself"); + } failover_state = promote_self(); } @@ -1034,7 +1041,7 @@ do_primary_failover(void) */ if (failover_state == FAILOVER_STATE_WAITING_NEW_PRIMARY) { - int new_primary_id; + int new_primary_id = UNKNOWN_NODE_ID; /* TODO: rerun election if new primary doesn't appear after timeout */ @@ -1563,6 +1570,8 @@ promote_self(void) return FAILOVER_STATE_PROMOTION_FAILED; } + /* bump the electoral term */ + increment_current_term(local_conn); initPQExpBuffer(&event_details); @@ -1604,10 +1613,11 @@ notify_followers(NodeInfoList *standby_nodes, int follow_node_id) { NodeInfoListCell *cell; - log_debug("notify_followers()"); + log_debug("notify_followers(): %i followers to notify", standby_nodes->node_count); + for (cell = standby_nodes->head; cell; cell = cell->next) { - log_debug("intending to notify node %i... ", cell->node_info->node_id); + log_verbose(LOG_DEBUG, "intending to notify node %i... ", cell->node_info->node_id); if (PQstatus(cell->node_info->conn) != CONNECTION_OK) { log_debug("reconnecting to node %i... ", cell->node_info->node_id); @@ -1925,7 +1935,6 @@ static ElectionResult do_election(void) { int electoral_term = -1; - int votes_for_me = 0; /* we're visible */ @@ -1960,6 +1969,17 @@ do_election(void) long unsigned rand_wait = (long) ((rand() % 35) + 10) * 10000; + electoral_term = get_current_term(local_conn); + + if (electoral_term == -1) + { + log_error(_("unable to determine electoral term")); + + return ELECTION_NOT_CANDIDATE; + } + + log_debug("do_election(): electoral term is %i", electoral_term); + /* get all active nodes attached to primary, excluding self */ get_active_sibling_node_records(local_conn, local_node_info.node_id, @@ -2007,7 +2027,7 @@ do_election(void) * so when announcing ourselves as candidate to the other nodes, we'll * check for that and withdraw our candidature. */ - electoral_term = set_voting_status_initiated(local_conn); + set_voting_status_initiated(local_conn, electoral_term); /* no other standbys - normally win by default */ if (standby_nodes.node_count == 0) @@ -2065,7 +2085,7 @@ do_election(void) /* * see if the node is in the primary's location (but skip the check if - * we've seen + * we've seen a node there already) */ if (primary_location_seen == false) {