repmgrd: various fixed, mainly clearing status after a failover event

This commit is contained in:
Ian Barwick
2017-07-04 11:55:03 +09:00
parent 78d45ebf68
commit 618a2346e1
7 changed files with 242 additions and 49 deletions

View File

@@ -268,9 +268,14 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
options->event_notifications.tail = NULL;
/* barman settings */
/* --------------- */
memset(options->barman_server, 0, sizeof(options->barman_server));
memset(options->barman_config, 0, sizeof(options->barman_config));
/* undocumented test settings */
/* -------------------------- */
options->promote_delay = 0;
/*
* If no configuration file available (user didn't specify and none found
* in the default locations), return with default values
@@ -455,6 +460,10 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
else if (strcmp(name, "barman_config") == 0)
strncpy(options->barman_config, value, MAXLEN);
/* undocumented test settings */
else if (strcmp(name, "promote_delay") == 0)
options->promote_delay = repmgr_atoi(value, name, error_list, 1);
/* Following parameters have been deprecated or renamed from 3.x - issue a warning */
else if (strcmp(name, "cluster") == 0)
{

View File

@@ -106,6 +106,9 @@ typedef struct
char barman_host[MAXLEN];
char barman_server[MAXLEN];
char barman_config[MAXLEN];
/* undocumented test settings */
int promote_delay;
} t_configuration_options;
/*
@@ -131,7 +134,10 @@ typedef struct
/* bdr settings */ \
BDR_MONITORING_LOCAL, \
/* barman settings */ \
"", "", "" }
"", "", "", \
/* undocumented test settings */ \
0 \
}

View File

@@ -1714,6 +1714,8 @@ clear_node_info_list(NodeInfoList *nodes)
NodeInfoListCell *cell;
NodeInfoListCell *next_cell;
log_debug("clear_node_info_list() - closing open connections");
/* close any open connections */
for (cell = nodes->head; cell; cell = cell->next)
{
@@ -1724,6 +1726,8 @@ clear_node_info_list(NodeInfoList *nodes)
}
}
log_debug("clear_node_info_list() - unlinking");
cell = nodes->head;
while (cell != NULL)
@@ -1733,7 +1737,8 @@ clear_node_info_list(NodeInfoList *nodes)
pfree(cell);
cell = next_cell;
}
nodes->head = NULL;
nodes->tail = NULL;
nodes->node_count = 0;
}
@@ -2493,6 +2498,32 @@ get_new_primary(PGconn *conn, int *primary_node_id)
}
void
reset_voting_status(PGconn *conn)
{
PQExpBufferData query;
PGresult *res;
initPQExpBuffer(&query);
appendPQExpBuffer(&query,
"SELECT repmgr.reset_voting_status()");
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
// COMMAND_OK?
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_error(_("unable to execute repmgr..reset_voting_status():\n %s"),
PQerrorMessage(conn));
}
PQclear(res);
return;
}
/* ============================ */
/* replication status functions */
/* ============================ */

View File

@@ -249,6 +249,7 @@ int set_voting_status_initiated(PGconn *conn);
bool announce_candidature(PGconn *conn, t_node_info *this_node, t_node_info *other_node, int electoral_term);
void notify_follow_primary(PGconn *conn, int primary_node_id);
bool get_new_primary(PGconn *conn, int *primary_node_id);
void reset_voting_status(PGconn *conn);
/* replication status functions */

View File

@@ -65,3 +65,8 @@ CREATE FUNCTION get_new_primary()
RETURNS INT
AS '$libdir/repmgr', 'get_new_primary'
LANGUAGE C STRICT;
CREATE FUNCTION reset_voting_status()
RETURNS VOID
AS '$libdir/repmgr', 'reset_voting_status'
LANGUAGE C STRICT;

View File

@@ -80,8 +80,8 @@ PG_FUNCTION_INFO_V1(notify_follow_primary);
Datum get_new_primary(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(get_new_primary);
//Datum set_new_primary(PG_FUNCTION_ARGS);
//PG_FUNCTION_INFO_V1(set_new_primary);
Datum reset_voting_status(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(reset_voting_status);
/*
* Module load callback
@@ -153,9 +153,9 @@ repmgr_shmem_startup(void)
shared_state->lock = LWLockAssign();
#endif
shared_state->current_electoral_term = 0;
shared_state->voting_status = VS_NO_VOTE;
shared_state->candidate_node_id = UNKNOWN_NODE_ID;
shared_state->current_electoral_term = 0;
shared_state->follow_new_primary = false;
}
@@ -315,3 +315,18 @@ get_new_primary(PG_FUNCTION_ARGS)
PG_RETURN_INT32(new_primary_node_id);
}
Datum
reset_voting_status(PG_FUNCTION_ARGS)
{
LWLockAcquire(shared_state->lock, LW_SHARED);
shared_state->voting_status = VS_NO_VOTE;
shared_state->candidate_node_id = UNKNOWN_NODE_ID;
shared_state->follow_new_primary = false;
LWLockRelease(shared_state->lock);
PG_RETURN_VOID();
}

214
repmgrd.c
View File

@@ -33,9 +33,11 @@ typedef enum {
FAILOVER_STATE_PRIMARY_REAPPEARED,
FAILOVER_STATE_LOCAL_NODE_FAILURE,
FAILOVER_STATE_WAITING_NEW_PRIMARY,
FAILOVER_STATE_NO_NEW_PRIMARY,
FAILOVER_STATE_FOLLOWED_NEW_PRIMARY,
FAILOVER_STATE_FOLLOW_FAIL
FAILOVER_STATE_FOLLOWING_ORIGINAL_PRIMARY,
FAILOVER_STATE_NO_NEW_PRIMARY,
FAILOVER_STATE_FOLLOW_FAIL,
FAILOVER_STATE_NODE_NOTIFICATION_ERROR
} FailoverState;
@@ -96,13 +98,15 @@ static const char *_print_voting_status(NodeVotingStatus voting_status);
static const char *_print_election_result(ElectionResult result);
static FailoverState promote_self(void);
static void notify_followers(NodeInfoList *standby_nodes);
static void notify_followers(NodeInfoList *standby_nodes, int follow_node_id);
static t_node_info *poll_best_candidate(NodeInfoList *standby_nodes);
static bool wait_primary_notification(int *new_primary_id);
static FailoverState follow_new_primary(int new_primary_id);
static void reset_node_voting_status(void);
static void close_connections();
static void terminate(int retval);
@@ -436,10 +440,10 @@ start_monitoring(void)
local_node_info.node_name,
local_node_info.node_id);
failover_state = FAILOVER_STATE_NONE;
while(true)
{
reset_node_voting_status();
switch (local_node_info.type)
{
case PRIMARY:
@@ -471,17 +475,26 @@ monitor_streaming_primary(void)
/* Log startup event */
if (startup_event_logged == false)
{
PQExpBufferData event_details;
initPQExpBuffer(&event_details);
appendPQExpBuffer(&event_details,
_("monitoring cluster primary \"%s\" (node ID: %i)"),
local_node_info.node_name,
local_node_info.node_id);
create_event_record(local_conn,
&config_file_options,
config_file_options.node_id,
"repmgrd_start",
true,
NULL);
event_details.data);
startup_event_logged = true;
log_notice(_("monitoring cluster primary \"%s\" (node ID: %i)"),
local_node_info.node_name,
local_node_info.node_id);
log_notice("%s", event_details.data);
termPQExpBuffer(&event_details);
}
INSTR_TIME_SET_CURRENT(log_status_interval_start);
@@ -502,13 +515,12 @@ monitor_streaming_primary(void)
INSTR_TIME_SET_CURRENT(local_node_unreachable_start);
initPQExpBuffer(&event_details);
appendPQExpBuffer(&event_details,
_("unable to connect to local node"));
log_warning(event_details.data);
log_warning("%s", event_details.data);
node_status = NODE_STATUS_UNKNOWN;
@@ -540,7 +552,7 @@ monitor_streaming_primary(void)
appendPQExpBuffer(&event_details,
_("reconnected to local node after %i seconds"),
(int)local_node_unreachable_elapsed);
log_notice(event_details.data);
log_notice("%s", event_details.data);
create_event_record(local_conn,
&config_file_options,
@@ -603,19 +615,26 @@ monitor_streaming_standby(void)
/* Log startup event */
if (startup_event_logged == false)
{
PQExpBufferData event_details;
initPQExpBuffer(&event_details);
appendPQExpBuffer(&event_details,
_("monitoring upstream node \"%s\" (node ID: %i)"),
upstream_node_info.node_name,
upstream_node_info.node_id);
create_event_record(upstream_conn,
&config_file_options,
config_file_options.node_id,
"repmgrd_start",
true,
NULL);
event_details.data);
startup_event_logged = true;
log_notice(_("repmgrd on node \"%s\" (node ID: %i) monitoring upstream node \"%s\" (node ID: %i)"),
local_node_info.node_name,
local_node_info.node_id,
upstream_node_info.node_name,
upstream_node_info.node_id);
log_notice("%s", event_details.data);
termPQExpBuffer(&event_details);
}
INSTR_TIME_SET_CURRENT(log_status_interval_start);
@@ -647,6 +666,8 @@ monitor_streaming_standby(void)
{
/* attempt to initiate voting process */
ElectionResult election_result = do_election();
/* XXX add pre-event notification here */
failover_state = FAILOVER_STATE_UNKNOWN;
log_debug("election result: %s", _print_election_result(election_result));
@@ -673,7 +694,10 @@ monitor_streaming_standby(void)
best_candidate = poll_best_candidate(&standby_nodes);
/* this can occur in a tie-break situation, after we establish this node has priority*/
/*
* this can occur in a tie-break situation, where this node establishes
* it is the best candidate
*/
if (best_candidate->node_id == local_node_info.node_id)
{
log_notice("I am the best candidate, will now promote self and inform other nodes");
@@ -687,25 +711,36 @@ monitor_streaming_standby(void)
log_info("node %i is the best candidate, waiting for it to confirm so I can follow it",
best_candidate->node_id);
// notify candidate
/* notify the best candidate so it */
// XXX check result
candidate_conn = establish_db_connection(best_candidate->conninfo, false);
notify_follow_primary(candidate_conn, best_candidate->node_id);
if (PQstatus(candidate_conn) == CONNECTION_OK)
{
notify_follow_primary(candidate_conn, best_candidate->node_id);
/* we'll wait for the candidate to get back to us */
failover_state = FAILOVER_STATE_WAITING_NEW_PRIMARY;
}
else
{
log_error(_("unable to connect to candidate node (ID: %i)"), best_candidate->node_id);
failover_state = FAILOVER_STATE_NODE_NOTIFICATION_ERROR;
}
PQfinish(candidate_conn);
// we'll wait for the candidate to get back to us
failover_state = FAILOVER_STATE_WAITING_NEW_PRIMARY;
}
}
else
{
log_info("I am a follower and am waiting to be informed by the winner");
log_info(_("follower node awaiting notification from the candidate node"));
failover_state = FAILOVER_STATE_WAITING_NEW_PRIMARY;
}
/*
* node has decided it is a follower, so will await notification
* from the candidate that it has promoted itself and can be followed
*/
if (failover_state == FAILOVER_STATE_WAITING_NEW_PRIMARY)
{
int new_primary_id;
@@ -715,10 +750,15 @@ monitor_streaming_standby(void)
/* either follow or time out; either way resume monitoring */
if (wait_primary_notification(&new_primary_id) == true)
{
// if new_primary_id is self, promote
if (new_primary_id == local_node_info.node_id)
/* if primary has reappeared, no action needed */
if (new_primary_id == upstream_node_info.node_id)
{
log_notice("looks like I'm the promotion candidate, promoting");
failover_state = FAILOVER_STATE_FOLLOWING_ORIGINAL_PRIMARY;
}
/* if new_primary_id is self, promote */
else if (new_primary_id == local_node_info.node_id)
{
log_notice(_("this node is promotion candidate, promoting"));
failover_state = promote_self();
@@ -744,9 +784,11 @@ monitor_streaming_standby(void)
switch(failover_state)
{
case FAILOVER_STATE_PROMOTED:
/* inform former siblings that we are Number 1 */
log_debug("failover state is PROMOTED");
/* notify former siblings that they should now follow this node */
notify_followers(&standby_nodes, local_node_info.node_id);
notify_followers(&standby_nodes);
/* we no longer care about our former siblings */
clear_node_info_list(&standby_nodes);
@@ -755,19 +797,50 @@ monitor_streaming_standby(void)
failover_state = FAILOVER_STATE_NONE;
return;
case FAILOVER_STATE_PRIMARY_REAPPEARED:
log_debug("failover state is PRIMARY_REAPPEARED");
/* notify siblings that they should resume following the original primary */
notify_followers(&standby_nodes, upstream_node_info.node_id);
/* we no longer care about our former siblings */
clear_node_info_list(&standby_nodes);
/* pass control back down to start_monitoring() */
log_info(_("resuming standby monitoring mode"));
log_detail(_("original primary \"%s\" (node ID: %i) reappeared"),
upstream_node_info.node_name, upstream_node_info.node_id);
failover_state = FAILOVER_STATE_NONE;
return;
case FAILOVER_STATE_PROMOTION_FAILED:
log_debug("failover state is FAILED");
log_debug("failover state is PROMOTION FAILED");
break;
case FAILOVER_STATE_FOLLOWED_NEW_PRIMARY:
log_info(_("resuming standby monitoring mode"));
log_detail(_("following new primary \"%s\" (node id: %i)"),
upstream_node_info.node_name, upstream_node_info.node_id);
failover_state = FAILOVER_STATE_NONE;
break;
return;
case FAILOVER_STATE_FOLLOWING_ORIGINAL_PRIMARY:
log_info(_("resuming standby monitoring mode"));
log_detail(_("following original primary \"%s\" (node id: %i)"),
upstream_node_info.node_name, upstream_node_info.node_id);
failover_state = FAILOVER_STATE_NONE;
return;
case FAILOVER_STATE_NO_NEW_PRIMARY:
case FAILOVER_STATE_WAITING_NEW_PRIMARY:
/* pass control back down to start_monitoring() */
// -> should kick off new election
return;
case FAILOVER_STATE_PRIMARY_REAPPEARED:
case FAILOVER_STATE_LOCAL_NODE_FAILURE:
case FAILOVER_STATE_UNKNOWN:
case FAILOVER_STATE_NONE:
@@ -799,6 +872,7 @@ monitor_streaming_standby(void)
upstream_node_info.node_name,
upstream_node_info.node_id);
//log_debug(
INSTR_TIME_SET_CURRENT(log_status_interval_start);
}
}
@@ -817,6 +891,18 @@ promote_self(void)
t_node_info failed_primary = T_NODE_INFO_INITIALIZER;
RecordStatus record_status;
/*
* optionally add a delay before promoting the standby; this is mainly
* useful for testing (e.g. for reappearance of the original primary)
* and is not documented.
*/
if (config_file_options.promote_delay > 0)
{
log_debug("sleeping %i seconds before promoting standby",
config_file_options.promote_delay);
sleep(config_file_options.promote_delay);
}
// XXX check success
record_status = get_node_record(local_conn, local_node_info.upstream_node_id, &failed_primary);
@@ -859,12 +945,26 @@ promote_self(void)
if (PQstatus(primary_conn) == CONNECTION_OK && primary_node_id == failed_primary.node_id)
{
log_notice(_("original primary reappeared before this standby was promoted - no action taken"));
log_notice(_("original primary (id: %i) reappeared before this standby was promoted - no action taken"),
failed_primary.node_id);
/* XXX log an event here? */
initPQExpBuffer(&event_details);
appendPQExpBuffer(&event_details,
_("original primary \"%s\" (node ID: %i) reappeared"),
failed_primary.node_name,
failed_primary.node_id);
PQfinish(primary_conn);
primary_conn = NULL;
create_event_record(primary_conn,
&config_file_options,
local_node_info.node_id,
"repmgrd_failover_abort",
true,
event_details.data);
termPQExpBuffer(&event_details);
//PQfinish(primary_conn);
//primary_conn = NULL;
// XXX handle this!
// -> we'll need to let the other nodes know too....
@@ -885,36 +985,46 @@ promote_self(void)
/* update own internal node record */
record_status = get_node_record(local_conn, local_node_info.node_id, &local_node_info);
// XXX we're assuming the promote command updated metadata
/*
* XXX here we're assuming the promote command updated metadata
*/
appendPQExpBuffer(&event_details,
_("node %i promoted to primary; old primary %i marked as failed"),
local_node_info.node_id,
failed_primary.node_id);
/* my_local_conn is now the master */
/* local_conn is now the primary connection */
create_event_record(local_conn,
&config_file_options,
local_node_info.node_id,
"repmgrd_failover_promote",
true,
event_details.data);
termPQExpBuffer(&event_details);
return FAILOVER_STATE_PROMOTED;
}
/*
* Notify follower nodes about which node to follow. Normally this
* will be the current node, however if the original primary reappeared
* before this node could be promoted, we'll inform the followers they
* should resume monitoring the original primary.
*/
static void
notify_followers(NodeInfoList *standby_nodes)
notify_followers(NodeInfoList *standby_nodes, int follow_node_id)
{
NodeInfoListCell *cell;
log_debug("notify_followers()");
for (cell = standby_nodes->head; cell; cell = cell->next)
{
log_debug("intending to notify %i... ", cell->node_info->node_id);
log_debug("intending to notify node %i... ", cell->node_info->node_id);
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
{
log_debug("connection to %i lost... ", cell->node_info->node_id);
log_debug("reconnecting to node %i... ", cell->node_info->node_id);
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
}
@@ -925,8 +1035,10 @@ notify_followers(NodeInfoList *standby_nodes)
continue;
}
log_debug("notifying node %i to follow new primary", cell->node_info->node_id);
notify_follow_primary(cell->node_info->conn, local_node_info.node_id);
log_debug("notifying node %i to follow node %i",
cell->node_info->node_id, follow_node_id);
notify_follow_primary(cell->node_info->conn, follow_node_id);
}
}
@@ -1333,6 +1445,20 @@ do_election(void)
}
static void
reset_node_voting_status(void)
{
failover_state = FAILOVER_STATE_NONE;
if (PQstatus(local_conn) != CONNECTION_OK)
{
log_error(_("reset_node_voting_status(): local_conn not set"));
return;
}
reset_voting_status(local_conn);
}
static void
daemonize_process(void)
{