repmgrd: restrict BDR monitoring to two node setup

It's not safe to have more than two nodes with this kind of
"failover", so we don't need to select alternative nodes by
priority.
This commit is contained in:
Ian Barwick
2017-07-14 12:56:11 +09:00
parent 91c7013024
commit e3b3fb65f0
12 changed files with 96 additions and 212 deletions

View File

@@ -435,23 +435,6 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
else if (strcmp(name, "event_notifications") == 0)
parse_event_notifications_list(options, value);
/* bdr settings */
else if (strcmp(name, "bdr_monitoring_mode") == 0)
{
if (strncmp(value, "local", MAXLEN) == 0)
{
options->bdr_monitoring_mode = BDR_MONITORING_LOCAL;
}
else if (strcmp(value, "highest_priority") == 0)
{
options->bdr_monitoring_mode = BDR_MONITORING_PRIORITY;
}
else
{
item_list_append(error_list, _("value for 'bdr_monitoring_mode' must be 'local' or 'highest_priority'\n"));
}
}
/* barman settings */
else if (strcmp(name, "barman_host") == 0)
strncpy(options->barman_host, value, MAXLEN);

View File

@@ -97,9 +97,6 @@ typedef struct
char event_notification_command[MAXLEN];
EventNotificationList event_notifications;
/* bdr settings */
int bdr_monitoring_mode;
/* barman settings */
char barman_host[MAXLEN];
char barman_server[MAXLEN];
@@ -132,8 +129,6 @@ typedef struct
"", "", "", "", "", "", \
/* event notification settings */ \
"", { NULL, NULL }, \
/* bdr settings */ \
BDR_MONITORING_LOCAL, \
/* barman settings */ \
"", "", "", \
/* undocumented test settings */ \

View File

@@ -14,6 +14,11 @@
#include "catalog/pg_control.h"
/* mainly for use by repmgrd */
int server_version_num = 0;
static PGconn *_establish_db_connection(const char *conninfo,
const bool exit_on_error,
const bool log_notice,
@@ -1501,8 +1506,6 @@ get_active_sibling_node_records(PGconn *conn, int node_id, int upstream_node_id,
PQExpBufferData query;
PGresult *res;
clear_node_info_list(node_list);
initPQExpBuffer(&query);
appendPQExpBuffer(&query,
@@ -1535,8 +1538,6 @@ get_node_records_by_priority(PGconn *conn, NodeInfoList *node_list)
PQExpBufferData query;
PGresult *res;
clear_node_info_list(node_list);
initPQExpBuffer(&query);
appendPQExpBuffer(
@@ -1696,7 +1697,7 @@ update_node_record_set_active(PGconn *conn, int this_node_id, bool active)
appendPQExpBuffer(
&query,
"UPDATE repmgr.nodes SET active = %s "
" WHERE id = %i",
" WHERE node_id = %i",
active == true ? "TRUE" : "FALSE",
this_node_id);
@@ -1878,7 +1879,7 @@ update_node_record_conn_priority(PGconn *conn, t_configuration_options *options)
"UPDATE repmgr.nodes "
" SET conninfo = '%s', "
" priority = %d "
" WHERE id = %d ",
" WHERE node_id = %d ",
options->conninfo,
options->priority,
options->node_id);
@@ -1929,14 +1930,13 @@ delete_node_record(PGconn *conn, int node)
}
void
clear_node_info_list(NodeInfoList *nodes)
{
NodeInfoListCell *cell;
NodeInfoListCell *next_cell;
log_debug("clear_node_info_list() - closing open connections");
log_verbose(LOG_DEBUG, "clear_node_info_list() - closing open connections");
/* close any open connections */
for (cell = nodes->head; cell; cell = cell->next)
@@ -1948,7 +1948,7 @@ clear_node_info_list(NodeInfoList *nodes)
}
}
log_debug("clear_node_info_list() - unlinking");
log_verbose(LOG_DEBUG, "clear_node_info_list() - unlinking");
cell = nodes->head;
@@ -1959,6 +1959,7 @@ clear_node_info_list(NodeInfoList *nodes)
pfree(cell);
cell = next_cell;
}
nodes->head = NULL;
nodes->tail = NULL;
nodes->node_count = 0;
@@ -3134,9 +3135,7 @@ void _populate_bdr_node_records(PGresult *res, BdrNodeInfoList *node_list)
{
int i;
node_list->head = NULL;
node_list->tail = NULL;
node_list->node_count = 0;
clear_node_info_list(node_list);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{

View File

@@ -186,6 +186,9 @@ typedef struct BdrNodeInfoList
0 \
}
extern int server_version_num;
/* utility functions */
XLogRecPtr parse_lsn(const char *str);

View File

@@ -1467,12 +1467,12 @@ create_repmgr_extension(PGconn *conn)
int
check_server_version(PGconn *conn, char *server_type, bool exit_on_error, char *server_version_string)
{
int server_version_num = 0;
int conn_server_version_num = 0;
server_version_num = get_server_version(conn, server_version_string);
if (server_version_num < MIN_SUPPORTED_VERSION_NUM)
conn_server_version_num = get_server_version(conn, server_version_string);
if (conn_server_version_num < MIN_SUPPORTED_VERSION_NUM)
{
if (server_version_num > 0)
if (conn_server_version_num > 0)
log_error(_("%s requires %s to be PostgreSQL %s or later"),
progname(),
server_type,
@@ -1488,7 +1488,7 @@ check_server_version(PGconn *conn, char *server_type, bool exit_on_error, char *
return -1;
}
return server_version_num;
return conn_server_version_num;
}

View File

@@ -13,7 +13,7 @@
# repmgr and repmgrd require the following items to be configured.
#node_id=1 # A unique integer greater than zero
#node_id= # A unique integer greater than zero
#node_name='' # An arbitrary (but unique) string; we recommend
# using the server's hostname or another identifier
# unambiguously associated with the server to avoid
@@ -28,6 +28,7 @@
#
# For details on conninfo strings, see:
# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
#
# If repmgrd is in use, consider explicitly setting
# "connect_timeout" in the conninfo string to determine
# the length of time which elapses before a network
@@ -44,8 +45,9 @@
# Replication settings
#------------------------------------------------------------------------------
#replication_type=physical # Must be one of 'physical' or 'bdr'
#upstream_node_id=1 # When using cascading replication, a standby
#upstream_node_id= # When using cascading replication, a standby
# can connect to another upstream standby node
# which is specified by setting 'upstream_node_id'.
# In that case, the upstream node must exist

View File

@@ -30,8 +30,6 @@
#define REPLICATION_TYPE_PHYSICAL 1
#define REPLICATION_TYPE_BDR 2
#define BDR_MONITORING_LOCAL 1
#define BDR_MONITORING_PRIORITY 2
#define DEFAULT_LOCATION "default"
#define DEFAULT_PRIORITY 100
@@ -52,4 +50,5 @@
#define ERRBUFF_SIZE 512
#endif /* _REPMGR_H_ */

View File

@@ -14,6 +14,9 @@
static volatile sig_atomic_t got_SIGHUP = false;
static void do_bdr_failover(NodeInfoList *nodes);
void
do_bdr_node_check(void)
{
@@ -25,12 +28,8 @@ void
monitor_bdr(void)
{
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
PGconn *monitoring_conn = NULL;
t_node_info *monitored_node = NULL;
t_bdr_node_info bdr_node_info = T_BDR_NODE_INFO_INITIALIZER;
RecordStatus record_status;
bool failover_done = false;
/* sanity check local database */
log_info(_("connecting to local database '%s'"),
@@ -118,7 +117,6 @@ monitor_bdr(void)
}
/* Log startup event */
create_event_record(local_conn,
&config_file_options,
config_file_options.node_id,
@@ -128,158 +126,72 @@ monitor_bdr(void)
/*
* retrieve list of nodes - we'll need these if the DB connection goes away,
* or if we're monitoring a non-local node
*/
get_node_records_by_priority(local_conn, &nodes);
get_all_node_records(local_conn, &nodes);
/* decided which node to monitor */
log_debug("main_loop_bdr() monitoring local node %i", config_file_options.node_id);
if (config_file_options.bdr_monitoring_mode == BDR_MONITORING_LOCAL)
{
// if local, reuse local_conn and node info
//record_status = get_node_record(local_conn, config_file_options.node_id, &monitored_node);
monitored_node = &local_node_info;
monitoring_conn = establish_db_connection(monitored_node->conninfo, false);
log_debug("main_loop_bdr() monitoring local node %i", config_file_options.node_id);
}
else
{
NodeInfoListCell *cell;
for (cell = nodes.head; cell; cell = cell->next)
{
log_debug("main_loop_bdr() checking node %s %i", cell->node_info->node_name, cell->node_info->priority);
monitoring_conn = establish_db_connection(cell->node_info->conninfo, false);
if (PQstatus(monitoring_conn) == CONNECTION_OK)
{
log_debug("main_loop_bdr() monitoring node '%s' (ID %i, priority %i)",
cell->node_info->node_name, cell->node_info->node_id, cell->node_info->priority);
/* fetch the record again, as the node list is transient */
monitored_node = get_node_record_pointer(monitoring_conn, cell->node_info->node_id);
break;
}
}
}
// check monitored_node not null!
log_info(_("starting continuous bdr node monitoring"));
while (true)
{
/* normal state - connection active */
if (PQstatus(monitoring_conn) == CONNECTION_OK)
/* monitoring loop */
log_verbose(LOG_DEBUG, "bdr check loop...");
switch (monitoring_state)
{
// XXX detail
log_info(_("starting continuous bdr node monitoring"));
/* monitoring loop */
do
case MS_NORMAL:
{
log_verbose(LOG_DEBUG, "bdr check loop...");
if (is_server_available(local_node_info.conninfo) == false)
{
NodeInfoListCell *cell;
for (cell = nodes.head; cell; cell = cell->next)
{
log_debug("bdr_monitor() %s", cell->node_info->node_name);
}
}
if (is_server_available(monitored_node->conninfo) == false)
{
t_node_info *new_monitored_node;
// XXX improve
log_warning("connection problem!");
new_monitored_node = do_bdr_failover(&nodes, monitored_node);
if (new_monitored_node != NULL)
{
pfree(monitored_node);
monitored_node = new_monitored_node;
}
log_notice(_("monitored_node->node_name is now '%s' \n"), monitored_node->node_name);
do_bdr_failover(&nodes);
}
else
{
log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")",
config_file_options.monitor_interval_secs);
sleep(config_file_options.monitor_interval_secs);
}
if (got_SIGHUP)
{
/*
* if we can reload, then could need to change
* local_conn
*/
if (reload_config(&config_file_options))
{
PQfinish(local_conn);
local_conn = establish_db_connection(config_file_options.conninfo, true);
update_registration(local_conn);
}
/* reload node list */
get_node_records_by_priority(local_conn, &nodes);
got_SIGHUP = false;
}
} while (!failover_done);
}
/* local connection inactive - periodically try and connect */
/* TODO: make this an option */
else
{
monitoring_conn = establish_db_connection(monitored_node->conninfo, false);
if (PQstatus(monitoring_conn) == CONNECTION_OK)
}
case MS_DEGRADED:
{
// XXX event bdr_node_recovered -> if monitored == local node
if (monitored_node->node_id == config_file_options.node_id)
/* degraded monitoring */
if (is_server_available(local_node_info.conninfo) == true)
{
log_notice(_("local connection has returned, resuming monitoring"));
log_notice(_("monitored node %i has recovered"), local_node_info.node_id);
// do_bdr_recovery()
}
else
{
log_notice(_("connection to '%s' has returned, resuming monitoring"), monitored_node->node_name);
log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")",
config_file_options.monitor_interval_secs);
sleep(config_file_options.monitor_interval_secs);
}
}
else
{
sleep(config_file_options.monitor_interval_secs);
}
if (got_SIGHUP)
{
/*
* if we can reload, then could need to change
* local_conn
*/
if (reload_config(&config_file_options))
{
if (PQstatus(local_conn) == CONNECTION_OK)
{
PQfinish(local_conn);
local_conn = establish_db_connection(config_file_options.conninfo, true);
update_registration(local_conn);
}
}
/* reload node list */
if (PQstatus(local_conn) == CONNECTION_OK)
get_node_records_by_priority(local_conn, &nodes);
got_SIGHUP = false;
}
}
failover_done = false;
if (got_SIGHUP)
{
/*
* if we can reload, then could need to change
* local_conn
*/
if (reload_config(&config_file_options))
{
PQfinish(local_conn);
local_conn = establish_db_connection(config_file_options.conninfo, true);
update_registration(local_conn);
}
/* reload node list */
get_all_node_records(local_conn, &nodes);
got_SIGHUP = false;
}
}
return;
@@ -294,43 +206,44 @@ monitor_bdr(void)
* we'll do the following:
*
* - attempt to find another node, to set our node record as inactive
* (there should be only one other node)
* - generate an event log record on that node
* - optionally execute `bdr_failover_command`, passing the conninfo string
* of that node to the command; this can be used for e.g. reconfiguring
* pgbouncer.
* - if mode is 'BDR_MONITORING_PRIORITY', redirect monitoring to that node.
*
*/
t_node_info *
do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)
void
do_bdr_failover(NodeInfoList *nodes)
{
PGconn *next_node_conn = NULL;
NodeInfoListCell *cell;
bool failover_success = false;
PQExpBufferData event_details;
RecordStatus record_status;
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
t_node_info *new_monitored_node = NULL;
t_node_info target_node = T_NODE_INFO_INITIALIZER;
initPQExpBuffer(&event_details);
/* get next active priority node */
/* get next active node */
for (cell = nodes->head; cell; cell = cell->next)
{
log_debug("do_bdr_failover() %s", cell->node_info->node_name);
/* don't attempt to connect to the current monitored node, as that's the one which has failed */
if (cell->node_info->node_id == monitored_node->node_id)
if (cell->node_info->node_id == local_node_info.node_id)
continue;
/* XXX skip inactive node? */
next_node_conn = establish_db_connection(cell->node_info->conninfo, false);
if (PQstatus(next_node_conn) == CONNECTION_OK)
{
// XXX check if record returned
new_monitored_node = get_node_record_pointer(next_node_conn, cell->node_info->node_id);
record_status = get_node_record(next_node_conn, cell->node_info->node_id, &target_node);
break;
}
@@ -345,40 +258,34 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)
log_error("%s", event_details.data);
// no other nodes found
// continue degraded monitoring until node is restored?
}
else
{
log_info(_("connecting to target node %s"), cell->node_info->node_name);
log_info(_("connecting to target node %s"), target_node.node_name);
failover_success = true;
event_info.conninfo_str = cell->node_info->conninfo;
event_info.node_name = cell->node_info->node_name;
event_info.conninfo_str = target_node.conninfo;
event_info.node_name = target_node.node_name;
/* update our own record on the other node */
if (monitored_node->node_id == config_file_options.node_id)
{
update_node_record_set_active(next_node_conn, monitored_node->node_id, false);
}
if (config_file_options.bdr_monitoring_mode == BDR_MONITORING_PRIORITY)
{
log_notice(_("monitoring next available node by prioriy: %s (ID %i)"),
new_monitored_node->node_name,
new_monitored_node->node_id);
}
update_node_record_set_active(next_node_conn, local_node_info.node_id, false);
appendPQExpBuffer(&event_details,
_("node '%s' (ID: %i) detected as failed; next available node is '%s' (ID: %i)"),
monitored_node->node_name,
monitored_node->node_id,
cell->node_info->node_name,
cell->node_info->node_id);
local_node_info.node_name,
local_node_info.node_id,
target_node.node_name,
target_node.node_id);
}
monitoring_state = MS_DEGRADED;
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
// check here that the node hasn't come back up...
/*
* Create an event record
*
@@ -400,11 +307,7 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)
termPQExpBuffer(&event_details);
//failover_done = true;
if (config_file_options.bdr_monitoring_mode == BDR_MONITORING_PRIORITY)
return new_monitored_node;
/* local monitoring mode - there's no new node to monitor */
return NULL;
return;
}

View File

@@ -8,6 +8,5 @@
extern void do_bdr_node_check(void);
extern void monitor_bdr(void);
extern t_node_info *do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node);
#endif /* _REPMGRD_BDR_H_ */

View File

@@ -325,7 +325,11 @@ monitor_streaming_primary(void)
INSTR_TIME_SET_CURRENT(log_status_interval_start);
}
}
sleep(1);
log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")",
config_file_options.monitor_interval_secs);
sleep(config_file_options.monitor_interval_secs);
}
}

View File

@@ -37,7 +37,6 @@ PGconn *local_conn = NULL;
/* Collate command line errors here for friendlier reporting */
static ItemList cli_errors = { NULL, NULL };
int server_version_num = 0;
bool startup_event_logged = false;
MonitoringState monitoring_state = MS_NORMAL;
@@ -57,7 +56,6 @@ static void show_usage(void);
static void daemonize_process(void);
static void check_and_create_pid_file(const char *pid_file);
static void start_monitoring(void);

View File

@@ -27,7 +27,6 @@ extern t_configuration_options config_file_options;
extern t_node_info local_node_info;
extern PGconn *local_conn;
extern bool startup_event_logged;
extern int server_version_num;
PGconn *try_reconnect(const char *conninfo, NodeStatus *node_status);