diff --git a/dbutils.c b/dbutils.c index df833014..8fcba31f 100644 --- a/dbutils.c +++ b/dbutils.c @@ -2622,7 +2622,8 @@ get_voting_status(PGconn *conn) return voting_status; } -int + +VoteRequestResult request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int electoral_term) { PQExpBufferData query; @@ -2648,8 +2649,36 @@ request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int /* check for NULL */ if (PQgetisnull(res, 0, 0)) { - log_debug("XXX NULL returned by repmgr.request_vote()"); - return 0; + PQclear(res); + + log_debug("NULL returned by repmgr.request_vote()"); + + /* + * get the node's last receive location anyway + * TODO: have repmgr.request_vote() return two values + */ + + initPQExpBuffer(&query); + + appendPQExpBuffer( + &query, +#if (PG_VERSION_NUM >= 100000) + "SELECT pg_catalog.pg_last_wal_receive_lsn()"); +#else + "SELECT pg_catalog.pg_last_xlog_receive_location()"); +#endif + + res = PQexec(conn, query.data); + termPQExpBuffer(&query); + + if (PQresultStatus(res) == PGRES_TUPLES_OK) + { + other_node->last_wal_receive_lsn = parse_lsn(PQgetvalue(res, 0, 0)); + } + + PQclear(res); + + return VR_VOTE_REFUSED; } other_node->last_wal_receive_lsn = parse_lsn(PQgetvalue(res, 0, 0)); @@ -2664,7 +2693,7 @@ request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int if (lsn_diff > 0) { log_debug("local node is ahead"); - return 1; + return VR_POSITIVE_VOTE; } @@ -2672,7 +2701,7 @@ request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int if (lsn_diff < 0) { log_debug("other node is ahead"); - return 0; + return VR_NEGATIVE_VOTE; } /* tiebreak */ @@ -2681,12 +2710,12 @@ request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int if (this_node->priority < other_node->priority) { log_debug("other node has higher priority"); - return 0; + return VR_NEGATIVE_VOTE; } /* still tiebreak - we're the candidate, so we win */ log_debug("win by default"); - return 1; + return VR_POSITIVE_VOTE; } diff --git a/dbutils.h b/dbutils.h index 380ab83b..533ce75e 100644 --- a/dbutils.h +++ b/dbutils.h @@ -52,6 +52,12 @@ typedef enum { NODE_STATUS_DOWN } NodeStatus; +typedef enum { + VR_VOTE_REFUSED = -1, + VR_POSITIVE_VOTE, + VR_NEGATIVE_VOTE +} VoteRequestResult; + /* * Struct to store node information */ @@ -310,7 +316,7 @@ bool is_server_available(const char *conninfo); /* node voting functions */ NodeVotingStatus get_voting_status(PGconn *conn); -int request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int electoral_term); +VoteRequestResult request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int electoral_term); int set_voting_status_initiated(PGconn *conn); bool announce_candidature(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int electoral_term); void notify_follow_primary(PGconn *conn, int primary_node_id); diff --git a/repmgr.c b/repmgr.c index 867d40e7..697f178a 100644 --- a/repmgr.c +++ b/repmgr.c @@ -206,13 +206,11 @@ request_vote(PG_FUNCTION_ARGS) initStringInfo(&query); -#if (PG_VERSION_NUM >= 100000) appendStringInfo( &query, +#if (PG_VERSION_NUM >= 100000) "SELECT pg_catalog.pg_last_wal_receive_lsn()"); #else - appendStringInfo( - &query, "SELECT pg_catalog.pg_last_xlog_receive_location()"); #endif diff --git a/repmgrd-physical.c b/repmgrd-physical.c index f8b2463c..7fcd7be6 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -680,7 +680,7 @@ do_primary_failover(void) /* attempt to initiate voting process */ ElectionResult election_result = do_election(); - /* XXX add pre-event notification here */ + /* TODO add pre-event notification here */ failover_state = FAILOVER_STATE_UNKNOWN; log_debug("election result: %s", _print_election_result(election_result)); @@ -702,13 +702,7 @@ do_primary_failover(void) log_info(_("I am the candidate but did not get all votes; will now determine the best candidate")); - - /* reset node list */ - get_active_sibling_node_records(local_conn, - local_node_info.node_id, - upstream_node_info.node_id, - &standby_nodes); - + /* standby_nodes is in the state created by do_election() */ best_candidate = poll_best_candidate(&standby_nodes); /* @@ -1220,12 +1214,14 @@ poll_best_candidate(NodeInfoList *standby_nodes) NodeInfoListCell *cell; t_node_info *best_candidate = &local_node_info; - // XXX ensure standby_nodes is set correctly /* * we need to definitively decide the best candidate, as in some corner * cases we could end up with two candidate nodes, so they should each - * come to the same conclusion + * come to the same conclusion. + * + * XXX check there are no cases where the standby node's LSN is + * not set */ for (cell = standby_nodes->head; cell; cell = cell->next) { @@ -1248,9 +1244,17 @@ poll_best_candidate(NodeInfoList *standby_nodes) log_debug("node %i has lower node_id, now best candidate", cell->node_info->node_id); best_candidate = cell->node_info; } + + if (cell->node_info->conn != NULL && PQstatus(upstream_conn) == CONNECTION_OK) + { + PQfinish(cell->node_info->conn); + cell->node_info->conn = NULL; + } } - log_info(_("best candidate is %i"), best_candidate->node_id); + log_info(_("best candidate is node %s (node ID: %i)"), + best_candidate->node_name, + best_candidate->node_id); return best_candidate; } @@ -1653,23 +1657,51 @@ do_election(void) for (cell = standby_nodes.head; cell; cell = cell->next) { + VoteRequestResult vote_result; + log_debug("checking node %i...", cell->node_info->node_id); /* ignore unreachable nodes */ if (cell->node_info->node_status != NODE_STATUS_UP) continue; - votes_for_me += request_vote(cell->node_info->conn, - &local_node_info, - cell->node_info, - electoral_term); + vote_result = request_vote(cell->node_info->conn, + &local_node_info, + cell->node_info, + electoral_term); + + switch (vote_result) + { + case VR_VOTE_REFUSED: + if (cell->node_info->node_id < local_node_info.node_id) + { + log_debug(_("node %i refused vote, their ID is lower, yielding"), + cell->node_info->node_id); + PQfinish(cell->node_info->conn); + cell->node_info->conn = NULL; + clear_node_info_list(&standby_nodes); + + reset_node_voting_status(); + log_debug("other node is candidate, returning NOT CANDIDATE"); + return ELECTION_NOT_CANDIDATE; + } + + log_debug(_("no vote recevied from %i, our ID is lower, not yielding"), + cell->node_info->node_id); + break; + + case VR_POSITIVE_VOTE: + votes_for_me += 1; + break; + case VR_NEGATIVE_VOTE: + break; + } if (cell->node_info->last_wal_receive_lsn > local_node_info.last_wal_receive_lsn) { /* register if another node is ahead of us */ other_node_is_ahead = true; } - PQfinish(cell->node_info->conn); - cell->node_info->conn = NULL; + } /* vote for myself, but only if I believe no-one else is ahead */ @@ -1722,4 +1754,3 @@ close_connections_physical() } } -