From fa86fe4ad8814702ec4d35f842d2810a42973d85 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 29 Jun 2017 01:11:21 +0900 Subject: [PATCH] Basic voting --- dbutils.c | 30 ++++++++++++++------ dbutils.h | 1 + repmgr--4.0.sql | 5 ++++ repmgr.c | 27 ++++++++++++++++++ repmgrd.c | 75 +++++++++++++++++++++++++++++++++++++++++-------- voting.h | 3 +- 6 files changed, 121 insertions(+), 20 deletions(-) diff --git a/dbutils.c b/dbutils.c index 4a23a577..ee20e126 100644 --- a/dbutils.c +++ b/dbutils.c @@ -2307,6 +2307,13 @@ request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, XLog termPQExpBuffer(&query); + // check for NULL + if (PQgetisnull(res, 0, 0)) + { + log_debug("XXX NULL returned by repmgr.request_vote()"); + return 0; + } + lsn_diff = atoi(PQgetvalue(res, 0, 0)); log_debug("XXX lsn_diff %i", lsn_diff); @@ -2337,16 +2344,23 @@ request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, XLog } /* still tiebreak - decide by node_id */ - if (this_node->node_id < other_node->node_id) - { - log_debug("this node has lower id"); - return 1; - } - log_debug("other node wins"); + + // we're the candidate, so we win + log_debug("win by default"); + return 1; + +} - /* we lose */ - return 0; +void +set_voting_status_initiated(PGconn *conn) +{ + PGresult *res; + + res = PQexec(conn, "SELECT repmgr.set_voting_status_initiated()"); + + PQclear(res); + return; } diff --git a/dbutils.h b/dbutils.h index b0299a91..745d4012 100644 --- a/dbutils.h +++ b/dbutils.h @@ -247,6 +247,7 @@ bool is_server_available(const char *conninfo); /* node voting functions */ NodeVotingStatus get_voting_status(PGconn *conn); int request_vote(PGconn *conn, t_node_info *this_node, t_node_info *other_node, XLogRecPtr last_wal_receive_lsn); +void set_voting_status_initiated(PGconn *conn); /* replication status functions */ diff --git a/repmgr--4.0.sql b/repmgr--4.0.sql index 56937266..3f2b2447 100644 --- a/repmgr--4.0.sql +++ b/repmgr--4.0.sql @@ -46,3 +46,8 @@ CREATE FUNCTION get_voting_status() RETURNS INT AS '$libdir/repmgr', 'get_voting_status' LANGUAGE C STRICT; + +CREATE FUNCTION set_voting_status_initiated() + RETURNS VOID + AS '$libdir/repmgr', 'set_voting_status_initiated' + LANGUAGE C STRICT; diff --git a/repmgr.c b/repmgr.c index e1a4b022..e5ed0c1e 100644 --- a/repmgr.c +++ b/repmgr.c @@ -64,6 +64,8 @@ PG_FUNCTION_INFO_V1(request_vote); Datum get_voting_status(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(get_voting_status); +Datum set_voting_status_initiated(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(set_voting_status_initiated); /* * Module load callback @@ -155,6 +157,16 @@ request_vote(PG_FUNCTION_ARGS) int lsn_diff; + NodeVotingStatus voting_status; + + LWLockAcquire(shared_state->lock, LW_SHARED); + voting_status = shared_state->voting_status; + LWLockRelease(shared_state->lock); + + /* this node has initiated voting or already responded to another node */ + if (voting_status != VS_NO_VOTE) + PG_RETURN_NULL(); + elog(INFO, "id is %i, lsn: %X/%X", requesting_node_id, (uint32) (requesting_node_last_lsn >> 32), @@ -182,6 +194,11 @@ request_vote(PG_FUNCTION_ARGS) SPI_finish(); + /* indicate this node has responded to a vote request */ + LWLockAcquire(shared_state->lock, LW_SHARED); + shared_state->voting_status = VS_VOTE_REQUEST_RECEIVED; + LWLockRelease(shared_state->lock); + PG_RETURN_INT32(lsn_diff); } @@ -198,3 +215,13 @@ get_voting_status(PG_FUNCTION_ARGS) PG_RETURN_INT32(voting_status); } + +Datum +set_voting_status_initiated(PG_FUNCTION_ARGS) +{ + LWLockAcquire(shared_state->lock, LW_SHARED); + shared_state->voting_status = VS_VOTE_INITIATED; + LWLockRelease(shared_state->lock); + + PG_RETURN_VOID(); +} diff --git a/repmgrd.c b/repmgrd.c index 926e8d06..05a83bff 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -6,7 +6,9 @@ #include #include +#include #include +#include #include "repmgr.h" #include "config.h" @@ -103,6 +105,8 @@ main(int argc, char **argv) set_progname(argv[0]); + srand ( time(NULL) ); + /* Disallow running as root */ if (geteuid() == 0) { @@ -530,7 +534,32 @@ monitor_streaming_standby(void) /* still down after reconnect attempt(s) - */ if (upstream_node_status == NODE_STATUS_DOWN) { - do_election(); + NodeVotingStatus voting_status = do_election(); + + switch(voting_status) + { + case VS_NO_VOTE: + log_info("NO VOTE"); + break; + case VS_VOTE_REQUEST_RECEIVED: + log_info("VOTE REQUEST RECEIVED"); + break; + case VS_VOTE_INITIATED: + log_info("VOTE REQUEST INITIATED"); + break; + + case VS_VOTE_WON: + log_info("VOTE REQUEST WON"); + break; + case VS_VOTE_LOST: + log_info("VOTE REQUEST LOST"); + break; + + case VS_UNKNOWN: + log_info("VOTE REQUEST UNKNOWN"); + break; + } + // begin voting process // if VS_PROMOTION_CANDIDATE @@ -551,9 +580,8 @@ monitor_streaming_standby(void) static NodeVotingStatus do_election(void) { - int total_eligible_nodes = 0; - /* current node votes for itself by default */ - int votes_for_me = 1; +// int total_eligible_nodes = 0; + int votes_for_me = 0; /* we're visible */ int visible_nodes = 1; @@ -568,15 +596,27 @@ do_election(void) NodeInfoList standby_nodes = T_NODE_INFO_LIST_INITIALIZER; NodeInfoListCell *cell; + long unsigned rand_wait = (long) ((rand() % 50) + 10) * 10000; + + log_debug("do_election(): sleeping %li", rand_wait); + + pg_usleep(rand_wait); + + voting_status = get_voting_status(local_conn); log_debug("do_election(): node voting status is %i", (int)voting_status); if (voting_status == VS_VOTE_REQUEST_RECEIVED) { + log_debug("vote request already received, not candidate"); /* we've already been requested to vote, so can't become a candidate */ return voting_status; } + // XXX should we mark ourselves as candidate? + // -> so any further vote requests are rejected? + set_voting_status_initiated(local_conn); + /* get all active nodes attached to primary, excluding self */ // XXX include barman node in results @@ -612,15 +652,25 @@ do_election(void) // XXX check if > 50% visible - /* check if we've been asked to vote again */ - // XXX do that + /* check again if we've been asked to vote */ - // XXX should we mark ourselves as candidate? - // -> so any further vote requests are rejected? +if (0) +{ + voting_status = get_voting_status(local_conn); + log_debug("do_election(): node voting status is %i", (int)voting_status); + if (voting_status == VS_VOTE_REQUEST_RECEIVED) + { + /* we've already been requested to vote, so can't become a candidate */ + return voting_status; + } +} + + /* current node votes for itself by default */ + votes_for_me += 1; + /* get our lsn*/ - last_wal_receive_lsn = get_last_wal_receive_location(local_conn); log_debug("LAST receive lsn = %X/%X", (uint32) (last_wal_receive_lsn >> 32), (uint32) last_wal_receive_lsn); @@ -643,7 +693,10 @@ do_election(void) log_notice(_("%i of of %i votes"), votes_for_me, visible_nodes); - return VS_VOTE_WON; + if (votes_for_me == visible_nodes) + return VS_VOTE_WON; + + return VS_VOTE_LOST; } static void @@ -846,7 +899,7 @@ try_reconnect(const char *conninfo, NodeStatus *node_status) int i; // XXX make this all configurable - int max_attempts = 15; + int max_attempts = 10; for (i = 0; i < max_attempts; i++) { diff --git a/voting.h b/voting.h index 5ddd4a08..abad4386 100644 --- a/voting.h +++ b/voting.h @@ -10,7 +10,8 @@ typedef enum { VS_NO_VOTE, VS_VOTE_REQUEST_RECEIVED, VS_VOTE_INITIATED, - VS_VOTE_WON + VS_VOTE_WON, + VS_VOTE_LOST } NodeVotingStatus; #endif /* _VOTING_H_ */