repmgrd: restrict BDR monitoring to two node setup

It's not safe to have more than two nodes with this kind of
"failover", so we don't need to select alternative nodes by
priority.
This commit is contained in:
Ian Barwick
2017-07-14 12:56:11 +09:00
parent 91c7013024
commit e3b3fb65f0
12 changed files with 96 additions and 212 deletions

View File

@@ -435,23 +435,6 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
else if (strcmp(name, "event_notifications") == 0) else if (strcmp(name, "event_notifications") == 0)
parse_event_notifications_list(options, value); parse_event_notifications_list(options, value);
/* bdr settings */
else if (strcmp(name, "bdr_monitoring_mode") == 0)
{
if (strncmp(value, "local", MAXLEN) == 0)
{
options->bdr_monitoring_mode = BDR_MONITORING_LOCAL;
}
else if (strcmp(value, "highest_priority") == 0)
{
options->bdr_monitoring_mode = BDR_MONITORING_PRIORITY;
}
else
{
item_list_append(error_list, _("value for 'bdr_monitoring_mode' must be 'local' or 'highest_priority'\n"));
}
}
/* barman settings */ /* barman settings */
else if (strcmp(name, "barman_host") == 0) else if (strcmp(name, "barman_host") == 0)
strncpy(options->barman_host, value, MAXLEN); strncpy(options->barman_host, value, MAXLEN);

View File

@@ -97,9 +97,6 @@ typedef struct
char event_notification_command[MAXLEN]; char event_notification_command[MAXLEN];
EventNotificationList event_notifications; EventNotificationList event_notifications;
/* bdr settings */
int bdr_monitoring_mode;
/* barman settings */ /* barman settings */
char barman_host[MAXLEN]; char barman_host[MAXLEN];
char barman_server[MAXLEN]; char barman_server[MAXLEN];
@@ -132,8 +129,6 @@ typedef struct
"", "", "", "", "", "", \ "", "", "", "", "", "", \
/* event notification settings */ \ /* event notification settings */ \
"", { NULL, NULL }, \ "", { NULL, NULL }, \
/* bdr settings */ \
BDR_MONITORING_LOCAL, \
/* barman settings */ \ /* barman settings */ \
"", "", "", \ "", "", "", \
/* undocumented test settings */ \ /* undocumented test settings */ \

View File

@@ -14,6 +14,11 @@
#include "catalog/pg_control.h" #include "catalog/pg_control.h"
/* mainly for use by repmgrd */
int server_version_num = 0;
static PGconn *_establish_db_connection(const char *conninfo, static PGconn *_establish_db_connection(const char *conninfo,
const bool exit_on_error, const bool exit_on_error,
const bool log_notice, const bool log_notice,
@@ -1501,8 +1506,6 @@ get_active_sibling_node_records(PGconn *conn, int node_id, int upstream_node_id,
PQExpBufferData query; PQExpBufferData query;
PGresult *res; PGresult *res;
clear_node_info_list(node_list);
initPQExpBuffer(&query); initPQExpBuffer(&query);
appendPQExpBuffer(&query, appendPQExpBuffer(&query,
@@ -1535,8 +1538,6 @@ get_node_records_by_priority(PGconn *conn, NodeInfoList *node_list)
PQExpBufferData query; PQExpBufferData query;
PGresult *res; PGresult *res;
clear_node_info_list(node_list);
initPQExpBuffer(&query); initPQExpBuffer(&query);
appendPQExpBuffer( appendPQExpBuffer(
@@ -1696,7 +1697,7 @@ update_node_record_set_active(PGconn *conn, int this_node_id, bool active)
appendPQExpBuffer( appendPQExpBuffer(
&query, &query,
"UPDATE repmgr.nodes SET active = %s " "UPDATE repmgr.nodes SET active = %s "
" WHERE id = %i", " WHERE node_id = %i",
active == true ? "TRUE" : "FALSE", active == true ? "TRUE" : "FALSE",
this_node_id); this_node_id);
@@ -1878,7 +1879,7 @@ update_node_record_conn_priority(PGconn *conn, t_configuration_options *options)
"UPDATE repmgr.nodes " "UPDATE repmgr.nodes "
" SET conninfo = '%s', " " SET conninfo = '%s', "
" priority = %d " " priority = %d "
" WHERE id = %d ", " WHERE node_id = %d ",
options->conninfo, options->conninfo,
options->priority, options->priority,
options->node_id); options->node_id);
@@ -1929,14 +1930,13 @@ delete_node_record(PGconn *conn, int node)
} }
void void
clear_node_info_list(NodeInfoList *nodes) clear_node_info_list(NodeInfoList *nodes)
{ {
NodeInfoListCell *cell; NodeInfoListCell *cell;
NodeInfoListCell *next_cell; NodeInfoListCell *next_cell;
log_debug("clear_node_info_list() - closing open connections"); log_verbose(LOG_DEBUG, "clear_node_info_list() - closing open connections");
/* close any open connections */ /* close any open connections */
for (cell = nodes->head; cell; cell = cell->next) for (cell = nodes->head; cell; cell = cell->next)
@@ -1948,7 +1948,7 @@ clear_node_info_list(NodeInfoList *nodes)
} }
} }
log_debug("clear_node_info_list() - unlinking"); log_verbose(LOG_DEBUG, "clear_node_info_list() - unlinking");
cell = nodes->head; cell = nodes->head;
@@ -1959,6 +1959,7 @@ clear_node_info_list(NodeInfoList *nodes)
pfree(cell); pfree(cell);
cell = next_cell; cell = next_cell;
} }
nodes->head = NULL; nodes->head = NULL;
nodes->tail = NULL; nodes->tail = NULL;
nodes->node_count = 0; nodes->node_count = 0;
@@ -3134,9 +3135,7 @@ void _populate_bdr_node_records(PGresult *res, BdrNodeInfoList *node_list)
{ {
int i; int i;
node_list->head = NULL; clear_node_info_list(node_list);
node_list->tail = NULL;
node_list->node_count = 0;
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (PQresultStatus(res) != PGRES_TUPLES_OK)
{ {

View File

@@ -186,6 +186,9 @@ typedef struct BdrNodeInfoList
0 \ 0 \
} }
extern int server_version_num;
/* utility functions */ /* utility functions */
XLogRecPtr parse_lsn(const char *str); XLogRecPtr parse_lsn(const char *str);

View File

@@ -1467,12 +1467,12 @@ create_repmgr_extension(PGconn *conn)
int int
check_server_version(PGconn *conn, char *server_type, bool exit_on_error, char *server_version_string) check_server_version(PGconn *conn, char *server_type, bool exit_on_error, char *server_version_string)
{ {
int server_version_num = 0; int conn_server_version_num = 0;
server_version_num = get_server_version(conn, server_version_string); conn_server_version_num = get_server_version(conn, server_version_string);
if (server_version_num < MIN_SUPPORTED_VERSION_NUM) if (conn_server_version_num < MIN_SUPPORTED_VERSION_NUM)
{ {
if (server_version_num > 0) if (conn_server_version_num > 0)
log_error(_("%s requires %s to be PostgreSQL %s or later"), log_error(_("%s requires %s to be PostgreSQL %s or later"),
progname(), progname(),
server_type, server_type,
@@ -1488,7 +1488,7 @@ check_server_version(PGconn *conn, char *server_type, bool exit_on_error, char *
return -1; return -1;
} }
return server_version_num; return conn_server_version_num;
} }

View File

@@ -13,7 +13,7 @@
# repmgr and repmgrd require the following items to be configured. # repmgr and repmgrd require the following items to be configured.
#node_id=1 # A unique integer greater than zero #node_id= # A unique integer greater than zero
#node_name='' # An arbitrary (but unique) string; we recommend #node_name='' # An arbitrary (but unique) string; we recommend
# using the server's hostname or another identifier # using the server's hostname or another identifier
# unambiguously associated with the server to avoid # unambiguously associated with the server to avoid
@@ -28,6 +28,7 @@
# #
# For details on conninfo strings, see: # For details on conninfo strings, see:
# https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING # https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
#
# If repmgrd is in use, consider explicitly setting # If repmgrd is in use, consider explicitly setting
# "connect_timeout" in the conninfo string to determine # "connect_timeout" in the conninfo string to determine
# the length of time which elapses before a network # the length of time which elapses before a network
@@ -44,8 +45,9 @@
# Replication settings # Replication settings
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
#replication_type=physical # Must be one of 'physical' or 'bdr'
#upstream_node_id=1 # When using cascading replication, a standby #upstream_node_id= # When using cascading replication, a standby
# can connect to another upstream standby node # can connect to another upstream standby node
# which is specified by setting 'upstream_node_id'. # which is specified by setting 'upstream_node_id'.
# In that case, the upstream node must exist # In that case, the upstream node must exist

View File

@@ -30,8 +30,6 @@
#define REPLICATION_TYPE_PHYSICAL 1 #define REPLICATION_TYPE_PHYSICAL 1
#define REPLICATION_TYPE_BDR 2 #define REPLICATION_TYPE_BDR 2
#define BDR_MONITORING_LOCAL 1
#define BDR_MONITORING_PRIORITY 2
#define DEFAULT_LOCATION "default" #define DEFAULT_LOCATION "default"
#define DEFAULT_PRIORITY 100 #define DEFAULT_PRIORITY 100
@@ -52,4 +50,5 @@
#define ERRBUFF_SIZE 512 #define ERRBUFF_SIZE 512
#endif /* _REPMGR_H_ */ #endif /* _REPMGR_H_ */

View File

@@ -14,6 +14,9 @@
static volatile sig_atomic_t got_SIGHUP = false; static volatile sig_atomic_t got_SIGHUP = false;
static void do_bdr_failover(NodeInfoList *nodes);
void void
do_bdr_node_check(void) do_bdr_node_check(void)
{ {
@@ -25,12 +28,8 @@ void
monitor_bdr(void) monitor_bdr(void)
{ {
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER; NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
PGconn *monitoring_conn = NULL;
t_node_info *monitored_node = NULL;
t_bdr_node_info bdr_node_info = T_BDR_NODE_INFO_INITIALIZER; t_bdr_node_info bdr_node_info = T_BDR_NODE_INFO_INITIALIZER;
RecordStatus record_status; RecordStatus record_status;
bool failover_done = false;
/* sanity check local database */ /* sanity check local database */
log_info(_("connecting to local database '%s'"), log_info(_("connecting to local database '%s'"),
@@ -118,7 +117,6 @@ monitor_bdr(void)
} }
/* Log startup event */ /* Log startup event */
create_event_record(local_conn, create_event_record(local_conn,
&config_file_options, &config_file_options,
config_file_options.node_id, config_file_options.node_id,
@@ -128,158 +126,72 @@ monitor_bdr(void)
/* /*
* retrieve list of nodes - we'll need these if the DB connection goes away, * retrieve list of nodes - we'll need these if the DB connection goes away,
* or if we're monitoring a non-local node
*/ */
get_node_records_by_priority(local_conn, &nodes); get_all_node_records(local_conn, &nodes);
/* decided which node to monitor */ log_debug("main_loop_bdr() monitoring local node %i", config_file_options.node_id);
if (config_file_options.bdr_monitoring_mode == BDR_MONITORING_LOCAL) log_info(_("starting continuous bdr node monitoring"));
{
// if local, reuse local_conn and node info
//record_status = get_node_record(local_conn, config_file_options.node_id, &monitored_node);
monitored_node = &local_node_info;
monitoring_conn = establish_db_connection(monitored_node->conninfo, false);
log_debug("main_loop_bdr() monitoring local node %i", config_file_options.node_id);
}
else
{
NodeInfoListCell *cell;
for (cell = nodes.head; cell; cell = cell->next)
{
log_debug("main_loop_bdr() checking node %s %i", cell->node_info->node_name, cell->node_info->priority);
monitoring_conn = establish_db_connection(cell->node_info->conninfo, false);
if (PQstatus(monitoring_conn) == CONNECTION_OK)
{
log_debug("main_loop_bdr() monitoring node '%s' (ID %i, priority %i)",
cell->node_info->node_name, cell->node_info->node_id, cell->node_info->priority);
/* fetch the record again, as the node list is transient */
monitored_node = get_node_record_pointer(monitoring_conn, cell->node_info->node_id);
break;
}
}
}
// check monitored_node not null!
while (true) while (true)
{ {
/* normal state - connection active */
if (PQstatus(monitoring_conn) == CONNECTION_OK) /* monitoring loop */
log_verbose(LOG_DEBUG, "bdr check loop...");
switch (monitoring_state)
{ {
// XXX detail case MS_NORMAL:
log_info(_("starting continuous bdr node monitoring"));
/* monitoring loop */
do
{ {
log_verbose(LOG_DEBUG, "bdr check loop..."); if (is_server_available(local_node_info.conninfo) == false)
{ {
NodeInfoListCell *cell;
for (cell = nodes.head; cell; cell = cell->next)
{
log_debug("bdr_monitor() %s", cell->node_info->node_name);
}
}
if (is_server_available(monitored_node->conninfo) == false)
{
t_node_info *new_monitored_node;
// XXX improve // XXX improve
log_warning("connection problem!"); log_warning("connection problem!");
new_monitored_node = do_bdr_failover(&nodes, monitored_node); do_bdr_failover(&nodes);
if (new_monitored_node != NULL)
{
pfree(monitored_node);
monitored_node = new_monitored_node;
}
log_notice(_("monitored_node->node_name is now '%s' \n"), monitored_node->node_name);
} }
else else
{ {
log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")",
config_file_options.monitor_interval_secs);
sleep(config_file_options.monitor_interval_secs); sleep(config_file_options.monitor_interval_secs);
} }
}
if (got_SIGHUP) case MS_DEGRADED:
{
/*
* if we can reload, then could need to change
* local_conn
*/
if (reload_config(&config_file_options))
{
PQfinish(local_conn);
local_conn = establish_db_connection(config_file_options.conninfo, true);
update_registration(local_conn);
}
/* reload node list */
get_node_records_by_priority(local_conn, &nodes);
got_SIGHUP = false;
}
} while (!failover_done);
}
/* local connection inactive - periodically try and connect */
/* TODO: make this an option */
else
{
monitoring_conn = establish_db_connection(monitored_node->conninfo, false);
if (PQstatus(monitoring_conn) == CONNECTION_OK)
{ {
// XXX event bdr_node_recovered -> if monitored == local node /* degraded monitoring */
if (is_server_available(local_node_info.conninfo) == true)
if (monitored_node->node_id == config_file_options.node_id)
{ {
log_notice(_("local connection has returned, resuming monitoring")); log_notice(_("monitored node %i has recovered"), local_node_info.node_id);
// do_bdr_recovery()
} }
else else
{ {
log_notice(_("connection to '%s' has returned, resuming monitoring"), monitored_node->node_name); log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")",
config_file_options.monitor_interval_secs);
sleep(config_file_options.monitor_interval_secs);
} }
} }
else
{
sleep(config_file_options.monitor_interval_secs);
}
if (got_SIGHUP)
{
/*
* if we can reload, then could need to change
* local_conn
*/
if (reload_config(&config_file_options))
{
if (PQstatus(local_conn) == CONNECTION_OK)
{
PQfinish(local_conn);
local_conn = establish_db_connection(config_file_options.conninfo, true);
update_registration(local_conn);
}
}
/* reload node list */
if (PQstatus(local_conn) == CONNECTION_OK)
get_node_records_by_priority(local_conn, &nodes);
got_SIGHUP = false;
}
} }
failover_done = false; if (got_SIGHUP)
{
/*
* if we can reload, then could need to change
* local_conn
*/
if (reload_config(&config_file_options))
{
PQfinish(local_conn);
local_conn = establish_db_connection(config_file_options.conninfo, true);
update_registration(local_conn);
}
/* reload node list */
get_all_node_records(local_conn, &nodes);
got_SIGHUP = false;
}
} }
return; return;
@@ -294,43 +206,44 @@ monitor_bdr(void)
* we'll do the following: * we'll do the following:
* *
* - attempt to find another node, to set our node record as inactive * - attempt to find another node, to set our node record as inactive
* (there should be only one other node)
* - generate an event log record on that node * - generate an event log record on that node
* - optionally execute `bdr_failover_command`, passing the conninfo string * - optionally execute `bdr_failover_command`, passing the conninfo string
* of that node to the command; this can be used for e.g. reconfiguring * of that node to the command; this can be used for e.g. reconfiguring
* pgbouncer. * pgbouncer.
* - if mode is 'BDR_MONITORING_PRIORITY', redirect monitoring to that node.
* *
*/ */
t_node_info *
do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node) void
do_bdr_failover(NodeInfoList *nodes)
{ {
PGconn *next_node_conn = NULL; PGconn *next_node_conn = NULL;
NodeInfoListCell *cell; NodeInfoListCell *cell;
bool failover_success = false; bool failover_success = false;
PQExpBufferData event_details; PQExpBufferData event_details;
RecordStatus record_status;
t_event_info event_info = T_EVENT_INFO_INITIALIZER; t_event_info event_info = T_EVENT_INFO_INITIALIZER;
t_node_info *new_monitored_node = NULL; t_node_info target_node = T_NODE_INFO_INITIALIZER;
initPQExpBuffer(&event_details); initPQExpBuffer(&event_details);
/* get next active priority node */ /* get next active node */
for (cell = nodes->head; cell; cell = cell->next) for (cell = nodes->head; cell; cell = cell->next)
{ {
log_debug("do_bdr_failover() %s", cell->node_info->node_name); log_debug("do_bdr_failover() %s", cell->node_info->node_name);
/* don't attempt to connect to the current monitored node, as that's the one which has failed */ /* don't attempt to connect to the current monitored node, as that's the one which has failed */
if (cell->node_info->node_id == monitored_node->node_id) if (cell->node_info->node_id == local_node_info.node_id)
continue; continue;
/* XXX skip inactive node? */ /* XXX skip inactive node? */
next_node_conn = establish_db_connection(cell->node_info->conninfo, false); next_node_conn = establish_db_connection(cell->node_info->conninfo, false);
if (PQstatus(next_node_conn) == CONNECTION_OK) if (PQstatus(next_node_conn) == CONNECTION_OK)
{ {
// XXX check if record returned // XXX check if record returned
new_monitored_node = get_node_record_pointer(next_node_conn, cell->node_info->node_id); record_status = get_node_record(next_node_conn, cell->node_info->node_id, &target_node);
break; break;
} }
@@ -345,40 +258,34 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)
log_error("%s", event_details.data); log_error("%s", event_details.data);
// no other nodes found // no other nodes found
// continue degraded monitoring until node is restored? // continue degraded monitoring until node is restored?
} }
else else
{ {
log_info(_("connecting to target node %s"), cell->node_info->node_name); log_info(_("connecting to target node %s"), target_node.node_name);
failover_success = true; failover_success = true;
event_info.conninfo_str = cell->node_info->conninfo; event_info.conninfo_str = target_node.conninfo;
event_info.node_name = cell->node_info->node_name; event_info.node_name = target_node.node_name;
/* update our own record on the other node */ /* update our own record on the other node */
if (monitored_node->node_id == config_file_options.node_id) update_node_record_set_active(next_node_conn, local_node_info.node_id, false);
{
update_node_record_set_active(next_node_conn, monitored_node->node_id, false);
}
if (config_file_options.bdr_monitoring_mode == BDR_MONITORING_PRIORITY)
{
log_notice(_("monitoring next available node by prioriy: %s (ID %i)"),
new_monitored_node->node_name,
new_monitored_node->node_id);
}
appendPQExpBuffer(&event_details, appendPQExpBuffer(&event_details,
_("node '%s' (ID: %i) detected as failed; next available node is '%s' (ID: %i)"), _("node '%s' (ID: %i) detected as failed; next available node is '%s' (ID: %i)"),
monitored_node->node_name, local_node_info.node_name,
monitored_node->node_id, local_node_info.node_id,
cell->node_info->node_name, target_node.node_name,
cell->node_info->node_id); target_node.node_id);
} }
monitoring_state = MS_DEGRADED;
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
// check here that the node hasn't come back up...
/* /*
* Create an event record * Create an event record
* *
@@ -400,11 +307,7 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)
termPQExpBuffer(&event_details); termPQExpBuffer(&event_details);
//failover_done = true;
if (config_file_options.bdr_monitoring_mode == BDR_MONITORING_PRIORITY)
return new_monitored_node;
/* local monitoring mode - there's no new node to monitor */ /* local monitoring mode - there's no new node to monitor */
return NULL; return;
} }

View File

@@ -8,6 +8,5 @@
extern void do_bdr_node_check(void); extern void do_bdr_node_check(void);
extern void monitor_bdr(void); extern void monitor_bdr(void);
extern t_node_info *do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node);
#endif /* _REPMGRD_BDR_H_ */ #endif /* _REPMGRD_BDR_H_ */

View File

@@ -325,7 +325,11 @@ monitor_streaming_primary(void)
INSTR_TIME_SET_CURRENT(log_status_interval_start); INSTR_TIME_SET_CURRENT(log_status_interval_start);
} }
} }
sleep(1);
log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")",
config_file_options.monitor_interval_secs);
sleep(config_file_options.monitor_interval_secs);
} }
} }

View File

@@ -37,7 +37,6 @@ PGconn *local_conn = NULL;
/* Collate command line errors here for friendlier reporting */ /* Collate command line errors here for friendlier reporting */
static ItemList cli_errors = { NULL, NULL }; static ItemList cli_errors = { NULL, NULL };
int server_version_num = 0;
bool startup_event_logged = false; bool startup_event_logged = false;
MonitoringState monitoring_state = MS_NORMAL; MonitoringState monitoring_state = MS_NORMAL;
@@ -57,7 +56,6 @@ static void show_usage(void);
static void daemonize_process(void); static void daemonize_process(void);
static void check_and_create_pid_file(const char *pid_file); static void check_and_create_pid_file(const char *pid_file);
static void start_monitoring(void); static void start_monitoring(void);

View File

@@ -27,7 +27,6 @@ extern t_configuration_options config_file_options;
extern t_node_info local_node_info; extern t_node_info local_node_info;
extern PGconn *local_conn; extern PGconn *local_conn;
extern bool startup_event_logged; extern bool startup_event_logged;
extern int server_version_num;
PGconn *try_reconnect(const char *conninfo, NodeStatus *node_status); PGconn *try_reconnect(const char *conninfo, NodeStatus *node_status);