repmgrd: restrict BDR monitoring to two node setup

It's not safe to have more than two nodes with this kind of
"failover", so we don't need to select alternative nodes by
priority.
This commit is contained in:
Ian Barwick
2017-07-14 12:56:11 +09:00
parent 91c7013024
commit e3b3fb65f0
12 changed files with 96 additions and 212 deletions

View File

@@ -14,6 +14,9 @@
static volatile sig_atomic_t got_SIGHUP = false;
static void do_bdr_failover(NodeInfoList *nodes);
void
do_bdr_node_check(void)
{
@@ -25,12 +28,8 @@ void
monitor_bdr(void)
{
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
PGconn *monitoring_conn = NULL;
t_node_info *monitored_node = NULL;
t_bdr_node_info bdr_node_info = T_BDR_NODE_INFO_INITIALIZER;
RecordStatus record_status;
bool failover_done = false;
/* sanity check local database */
log_info(_("connecting to local database '%s'"),
@@ -118,7 +117,6 @@ monitor_bdr(void)
}
/* Log startup event */
create_event_record(local_conn,
&config_file_options,
config_file_options.node_id,
@@ -128,158 +126,72 @@ monitor_bdr(void)
/*
* retrieve list of nodes - we'll need these if the DB connection goes away,
* or if we're monitoring a non-local node
*/
get_node_records_by_priority(local_conn, &nodes);
get_all_node_records(local_conn, &nodes);
/* decided which node to monitor */
log_debug("main_loop_bdr() monitoring local node %i", config_file_options.node_id);
if (config_file_options.bdr_monitoring_mode == BDR_MONITORING_LOCAL)
{
// if local, reuse local_conn and node info
//record_status = get_node_record(local_conn, config_file_options.node_id, &monitored_node);
monitored_node = &local_node_info;
monitoring_conn = establish_db_connection(monitored_node->conninfo, false);
log_debug("main_loop_bdr() monitoring local node %i", config_file_options.node_id);
}
else
{
NodeInfoListCell *cell;
for (cell = nodes.head; cell; cell = cell->next)
{
log_debug("main_loop_bdr() checking node %s %i", cell->node_info->node_name, cell->node_info->priority);
monitoring_conn = establish_db_connection(cell->node_info->conninfo, false);
if (PQstatus(monitoring_conn) == CONNECTION_OK)
{
log_debug("main_loop_bdr() monitoring node '%s' (ID %i, priority %i)",
cell->node_info->node_name, cell->node_info->node_id, cell->node_info->priority);
/* fetch the record again, as the node list is transient */
monitored_node = get_node_record_pointer(monitoring_conn, cell->node_info->node_id);
break;
}
}
}
// check monitored_node not null!
log_info(_("starting continuous bdr node monitoring"));
while (true)
{
/* normal state - connection active */
if (PQstatus(monitoring_conn) == CONNECTION_OK)
/* monitoring loop */
log_verbose(LOG_DEBUG, "bdr check loop...");
switch (monitoring_state)
{
// XXX detail
log_info(_("starting continuous bdr node monitoring"));
/* monitoring loop */
do
case MS_NORMAL:
{
log_verbose(LOG_DEBUG, "bdr check loop...");
if (is_server_available(local_node_info.conninfo) == false)
{
NodeInfoListCell *cell;
for (cell = nodes.head; cell; cell = cell->next)
{
log_debug("bdr_monitor() %s", cell->node_info->node_name);
}
}
if (is_server_available(monitored_node->conninfo) == false)
{
t_node_info *new_monitored_node;
// XXX improve
log_warning("connection problem!");
new_monitored_node = do_bdr_failover(&nodes, monitored_node);
if (new_monitored_node != NULL)
{
pfree(monitored_node);
monitored_node = new_monitored_node;
}
log_notice(_("monitored_node->node_name is now '%s' \n"), monitored_node->node_name);
do_bdr_failover(&nodes);
}
else
{
log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")",
config_file_options.monitor_interval_secs);
sleep(config_file_options.monitor_interval_secs);
}
if (got_SIGHUP)
{
/*
* if we can reload, then could need to change
* local_conn
*/
if (reload_config(&config_file_options))
{
PQfinish(local_conn);
local_conn = establish_db_connection(config_file_options.conninfo, true);
update_registration(local_conn);
}
/* reload node list */
get_node_records_by_priority(local_conn, &nodes);
got_SIGHUP = false;
}
} while (!failover_done);
}
/* local connection inactive - periodically try and connect */
/* TODO: make this an option */
else
{
monitoring_conn = establish_db_connection(monitored_node->conninfo, false);
if (PQstatus(monitoring_conn) == CONNECTION_OK)
}
case MS_DEGRADED:
{
// XXX event bdr_node_recovered -> if monitored == local node
if (monitored_node->node_id == config_file_options.node_id)
/* degraded monitoring */
if (is_server_available(local_node_info.conninfo) == true)
{
log_notice(_("local connection has returned, resuming monitoring"));
log_notice(_("monitored node %i has recovered"), local_node_info.node_id);
// do_bdr_recovery()
}
else
{
log_notice(_("connection to '%s' has returned, resuming monitoring"), monitored_node->node_name);
log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")",
config_file_options.monitor_interval_secs);
sleep(config_file_options.monitor_interval_secs);
}
}
else
{
sleep(config_file_options.monitor_interval_secs);
}
if (got_SIGHUP)
{
/*
* if we can reload, then could need to change
* local_conn
*/
if (reload_config(&config_file_options))
{
if (PQstatus(local_conn) == CONNECTION_OK)
{
PQfinish(local_conn);
local_conn = establish_db_connection(config_file_options.conninfo, true);
update_registration(local_conn);
}
}
/* reload node list */
if (PQstatus(local_conn) == CONNECTION_OK)
get_node_records_by_priority(local_conn, &nodes);
got_SIGHUP = false;
}
}
failover_done = false;
if (got_SIGHUP)
{
/*
* if we can reload, then could need to change
* local_conn
*/
if (reload_config(&config_file_options))
{
PQfinish(local_conn);
local_conn = establish_db_connection(config_file_options.conninfo, true);
update_registration(local_conn);
}
/* reload node list */
get_all_node_records(local_conn, &nodes);
got_SIGHUP = false;
}
}
return;
@@ -294,43 +206,44 @@ monitor_bdr(void)
* we'll do the following:
*
* - attempt to find another node, to set our node record as inactive
* (there should be only one other node)
* - generate an event log record on that node
* - optionally execute `bdr_failover_command`, passing the conninfo string
* of that node to the command; this can be used for e.g. reconfiguring
* pgbouncer.
* - if mode is 'BDR_MONITORING_PRIORITY', redirect monitoring to that node.
*
*/
t_node_info *
do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)
void
do_bdr_failover(NodeInfoList *nodes)
{
PGconn *next_node_conn = NULL;
NodeInfoListCell *cell;
bool failover_success = false;
PQExpBufferData event_details;
RecordStatus record_status;
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
t_node_info *new_monitored_node = NULL;
t_node_info target_node = T_NODE_INFO_INITIALIZER;
initPQExpBuffer(&event_details);
/* get next active priority node */
/* get next active node */
for (cell = nodes->head; cell; cell = cell->next)
{
log_debug("do_bdr_failover() %s", cell->node_info->node_name);
/* don't attempt to connect to the current monitored node, as that's the one which has failed */
if (cell->node_info->node_id == monitored_node->node_id)
if (cell->node_info->node_id == local_node_info.node_id)
continue;
/* XXX skip inactive node? */
next_node_conn = establish_db_connection(cell->node_info->conninfo, false);
if (PQstatus(next_node_conn) == CONNECTION_OK)
{
// XXX check if record returned
new_monitored_node = get_node_record_pointer(next_node_conn, cell->node_info->node_id);
record_status = get_node_record(next_node_conn, cell->node_info->node_id, &target_node);
break;
}
@@ -345,40 +258,34 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)
log_error("%s", event_details.data);
// no other nodes found
// continue degraded monitoring until node is restored?
}
else
{
log_info(_("connecting to target node %s"), cell->node_info->node_name);
log_info(_("connecting to target node %s"), target_node.node_name);
failover_success = true;
event_info.conninfo_str = cell->node_info->conninfo;
event_info.node_name = cell->node_info->node_name;
event_info.conninfo_str = target_node.conninfo;
event_info.node_name = target_node.node_name;
/* update our own record on the other node */
if (monitored_node->node_id == config_file_options.node_id)
{
update_node_record_set_active(next_node_conn, monitored_node->node_id, false);
}
if (config_file_options.bdr_monitoring_mode == BDR_MONITORING_PRIORITY)
{
log_notice(_("monitoring next available node by prioriy: %s (ID %i)"),
new_monitored_node->node_name,
new_monitored_node->node_id);
}
update_node_record_set_active(next_node_conn, local_node_info.node_id, false);
appendPQExpBuffer(&event_details,
_("node '%s' (ID: %i) detected as failed; next available node is '%s' (ID: %i)"),
monitored_node->node_name,
monitored_node->node_id,
cell->node_info->node_name,
cell->node_info->node_id);
local_node_info.node_name,
local_node_info.node_id,
target_node.node_name,
target_node.node_id);
}
monitoring_state = MS_DEGRADED;
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
// check here that the node hasn't come back up...
/*
* Create an event record
*
@@ -400,11 +307,7 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)
termPQExpBuffer(&event_details);
//failover_done = true;
if (config_file_options.bdr_monitoring_mode == BDR_MONITORING_PRIORITY)
return new_monitored_node;
/* local monitoring mode - there's no new node to monitor */
return NULL;
return;
}