From e3b3fb65f0af46bfa5373f51a20edaf6f2f6ba7e Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 14 Jul 2017 12:56:11 +0900 Subject: [PATCH] repmgrd: restrict BDR monitoring to two node setup It's not safe to have more than two nodes with this kind of "failover", so we don't need to select alternative nodes by priority. --- config.c | 17 ---- config.h | 5 - dbutils.c | 23 +++-- dbutils.h | 3 + repmgr-client.c | 10 +- repmgr.conf.sample | 6 +- repmgr.h | 3 +- repmgrd-bdr.c | 231 +++++++++++++-------------------------------- repmgrd-bdr.h | 1 - repmgrd-physical.c | 6 +- repmgrd.c | 2 - repmgrd.h | 1 - 12 files changed, 96 insertions(+), 212 deletions(-) diff --git a/config.c b/config.c index 403d177f..7b8b25ec 100644 --- a/config.c +++ b/config.c @@ -435,23 +435,6 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * else if (strcmp(name, "event_notifications") == 0) parse_event_notifications_list(options, value); - /* bdr settings */ - else if (strcmp(name, "bdr_monitoring_mode") == 0) - { - if (strncmp(value, "local", MAXLEN) == 0) - { - options->bdr_monitoring_mode = BDR_MONITORING_LOCAL; - } - else if (strcmp(value, "highest_priority") == 0) - { - options->bdr_monitoring_mode = BDR_MONITORING_PRIORITY; - } - else - { - item_list_append(error_list, _("value for 'bdr_monitoring_mode' must be 'local' or 'highest_priority'\n")); - } - } - /* barman settings */ else if (strcmp(name, "barman_host") == 0) strncpy(options->barman_host, value, MAXLEN); diff --git a/config.h b/config.h index 80c6685d..77e34edf 100644 --- a/config.h +++ b/config.h @@ -97,9 +97,6 @@ typedef struct char event_notification_command[MAXLEN]; EventNotificationList event_notifications; - /* bdr settings */ - int bdr_monitoring_mode; - /* barman settings */ char barman_host[MAXLEN]; char barman_server[MAXLEN]; @@ -132,8 +129,6 @@ typedef struct "", "", "", "", "", "", \ /* event notification settings */ \ "", { NULL, NULL }, \ - /* bdr settings */ \ - BDR_MONITORING_LOCAL, \ /* barman settings */ \ "", "", "", \ /* undocumented test settings */ \ diff --git a/dbutils.c b/dbutils.c index 8f488e5a..11ff7ac5 100644 --- a/dbutils.c +++ b/dbutils.c @@ -14,6 +14,11 @@ #include "catalog/pg_control.h" + +/* mainly for use by repmgrd */ +int server_version_num = 0; + + static PGconn *_establish_db_connection(const char *conninfo, const bool exit_on_error, const bool log_notice, @@ -1501,8 +1506,6 @@ get_active_sibling_node_records(PGconn *conn, int node_id, int upstream_node_id, PQExpBufferData query; PGresult *res; - clear_node_info_list(node_list); - initPQExpBuffer(&query); appendPQExpBuffer(&query, @@ -1535,8 +1538,6 @@ get_node_records_by_priority(PGconn *conn, NodeInfoList *node_list) PQExpBufferData query; PGresult *res; - clear_node_info_list(node_list); - initPQExpBuffer(&query); appendPQExpBuffer( @@ -1696,7 +1697,7 @@ update_node_record_set_active(PGconn *conn, int this_node_id, bool active) appendPQExpBuffer( &query, "UPDATE repmgr.nodes SET active = %s " - " WHERE id = %i", + " WHERE node_id = %i", active == true ? "TRUE" : "FALSE", this_node_id); @@ -1878,7 +1879,7 @@ update_node_record_conn_priority(PGconn *conn, t_configuration_options *options) "UPDATE repmgr.nodes " " SET conninfo = '%s', " " priority = %d " - " WHERE id = %d ", + " WHERE node_id = %d ", options->conninfo, options->priority, options->node_id); @@ -1929,14 +1930,13 @@ delete_node_record(PGconn *conn, int node) } - void clear_node_info_list(NodeInfoList *nodes) { NodeInfoListCell *cell; NodeInfoListCell *next_cell; - log_debug("clear_node_info_list() - closing open connections"); + log_verbose(LOG_DEBUG, "clear_node_info_list() - closing open connections"); /* close any open connections */ for (cell = nodes->head; cell; cell = cell->next) @@ -1948,7 +1948,7 @@ clear_node_info_list(NodeInfoList *nodes) } } - log_debug("clear_node_info_list() - unlinking"); + log_verbose(LOG_DEBUG, "clear_node_info_list() - unlinking"); cell = nodes->head; @@ -1959,6 +1959,7 @@ clear_node_info_list(NodeInfoList *nodes) pfree(cell); cell = next_cell; } + nodes->head = NULL; nodes->tail = NULL; nodes->node_count = 0; @@ -3134,9 +3135,7 @@ void _populate_bdr_node_records(PGresult *res, BdrNodeInfoList *node_list) { int i; - node_list->head = NULL; - node_list->tail = NULL; - node_list->node_count = 0; + clear_node_info_list(node_list); if (PQresultStatus(res) != PGRES_TUPLES_OK) { diff --git a/dbutils.h b/dbutils.h index 2df11eb5..1b9ede4d 100644 --- a/dbutils.h +++ b/dbutils.h @@ -186,6 +186,9 @@ typedef struct BdrNodeInfoList 0 \ } + +extern int server_version_num; + /* utility functions */ XLogRecPtr parse_lsn(const char *str); diff --git a/repmgr-client.c b/repmgr-client.c index 234f6362..ac4b2948 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -1467,12 +1467,12 @@ create_repmgr_extension(PGconn *conn) int check_server_version(PGconn *conn, char *server_type, bool exit_on_error, char *server_version_string) { - int server_version_num = 0; + int conn_server_version_num = 0; - server_version_num = get_server_version(conn, server_version_string); - if (server_version_num < MIN_SUPPORTED_VERSION_NUM) + conn_server_version_num = get_server_version(conn, server_version_string); + if (conn_server_version_num < MIN_SUPPORTED_VERSION_NUM) { - if (server_version_num > 0) + if (conn_server_version_num > 0) log_error(_("%s requires %s to be PostgreSQL %s or later"), progname(), server_type, @@ -1488,7 +1488,7 @@ check_server_version(PGconn *conn, char *server_type, bool exit_on_error, char * return -1; } - return server_version_num; + return conn_server_version_num; } diff --git a/repmgr.conf.sample b/repmgr.conf.sample index a8c890dc..217895fc 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -13,7 +13,7 @@ # repmgr and repmgrd require the following items to be configured. -#node_id=1 # A unique integer greater than zero +#node_id= # A unique integer greater than zero #node_name='' # An arbitrary (but unique) string; we recommend # using the server's hostname or another identifier # unambiguously associated with the server to avoid @@ -28,6 +28,7 @@ # # For details on conninfo strings, see: # https://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING + # # If repmgrd is in use, consider explicitly setting # "connect_timeout" in the conninfo string to determine # the length of time which elapses before a network @@ -44,8 +45,9 @@ # Replication settings #------------------------------------------------------------------------------ +#replication_type=physical # Must be one of 'physical' or 'bdr' -#upstream_node_id=1 # When using cascading replication, a standby +#upstream_node_id= # When using cascading replication, a standby # can connect to another upstream standby node # which is specified by setting 'upstream_node_id'. # In that case, the upstream node must exist diff --git a/repmgr.h b/repmgr.h index 23a090fd..a627e413 100644 --- a/repmgr.h +++ b/repmgr.h @@ -30,8 +30,6 @@ #define REPLICATION_TYPE_PHYSICAL 1 #define REPLICATION_TYPE_BDR 2 -#define BDR_MONITORING_LOCAL 1 -#define BDR_MONITORING_PRIORITY 2 #define DEFAULT_LOCATION "default" #define DEFAULT_PRIORITY 100 @@ -52,4 +50,5 @@ #define ERRBUFF_SIZE 512 + #endif /* _REPMGR_H_ */ diff --git a/repmgrd-bdr.c b/repmgrd-bdr.c index a0df1964..9a6f0656 100644 --- a/repmgrd-bdr.c +++ b/repmgrd-bdr.c @@ -14,6 +14,9 @@ static volatile sig_atomic_t got_SIGHUP = false; +static void do_bdr_failover(NodeInfoList *nodes); + + void do_bdr_node_check(void) { @@ -25,12 +28,8 @@ void monitor_bdr(void) { NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER; - PGconn *monitoring_conn = NULL; - t_node_info *monitored_node = NULL; t_bdr_node_info bdr_node_info = T_BDR_NODE_INFO_INITIALIZER; - RecordStatus record_status; - bool failover_done = false; /* sanity check local database */ log_info(_("connecting to local database '%s'"), @@ -118,7 +117,6 @@ monitor_bdr(void) } /* Log startup event */ - create_event_record(local_conn, &config_file_options, config_file_options.node_id, @@ -128,158 +126,72 @@ monitor_bdr(void) /* * retrieve list of nodes - we'll need these if the DB connection goes away, - * or if we're monitoring a non-local node */ - get_node_records_by_priority(local_conn, &nodes); + get_all_node_records(local_conn, &nodes); - /* decided which node to monitor */ + log_debug("main_loop_bdr() monitoring local node %i", config_file_options.node_id); - if (config_file_options.bdr_monitoring_mode == BDR_MONITORING_LOCAL) - { - // if local, reuse local_conn and node info - //record_status = get_node_record(local_conn, config_file_options.node_id, &monitored_node); - monitored_node = &local_node_info; - - monitoring_conn = establish_db_connection(monitored_node->conninfo, false); - log_debug("main_loop_bdr() monitoring local node %i", config_file_options.node_id); - } - else - { - NodeInfoListCell *cell; - - for (cell = nodes.head; cell; cell = cell->next) - { - log_debug("main_loop_bdr() checking node %s %i", cell->node_info->node_name, cell->node_info->priority); - - monitoring_conn = establish_db_connection(cell->node_info->conninfo, false); - if (PQstatus(monitoring_conn) == CONNECTION_OK) - { - log_debug("main_loop_bdr() monitoring node '%s' (ID %i, priority %i)", - cell->node_info->node_name, cell->node_info->node_id, cell->node_info->priority); - /* fetch the record again, as the node list is transient */ - monitored_node = get_node_record_pointer(monitoring_conn, cell->node_info->node_id); - - break; - } - } - } - - // check monitored_node not null! + log_info(_("starting continuous bdr node monitoring")); while (true) { - /* normal state - connection active */ - if (PQstatus(monitoring_conn) == CONNECTION_OK) + + /* monitoring loop */ + log_verbose(LOG_DEBUG, "bdr check loop..."); + + switch (monitoring_state) { - // XXX detail - log_info(_("starting continuous bdr node monitoring")); - - /* monitoring loop */ - do + case MS_NORMAL: { - log_verbose(LOG_DEBUG, "bdr check loop..."); - + if (is_server_available(local_node_info.conninfo) == false) { - NodeInfoListCell *cell; - - for (cell = nodes.head; cell; cell = cell->next) - { - log_debug("bdr_monitor() %s", cell->node_info->node_name); - } - } - - if (is_server_available(monitored_node->conninfo) == false) - { - t_node_info *new_monitored_node; - // XXX improve log_warning("connection problem!"); - new_monitored_node = do_bdr_failover(&nodes, monitored_node); - - if (new_monitored_node != NULL) - { - pfree(monitored_node); - monitored_node = new_monitored_node; - } - log_notice(_("monitored_node->node_name is now '%s' \n"), monitored_node->node_name); + do_bdr_failover(&nodes); } else { + log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")", + config_file_options.monitor_interval_secs); sleep(config_file_options.monitor_interval_secs); } - - if (got_SIGHUP) - { - /* - * if we can reload, then could need to change - * local_conn - */ - if (reload_config(&config_file_options)) - { - PQfinish(local_conn); - local_conn = establish_db_connection(config_file_options.conninfo, true); - update_registration(local_conn); - } - - /* reload node list */ - get_node_records_by_priority(local_conn, &nodes); - - got_SIGHUP = false; - } - - } while (!failover_done); - } - /* local connection inactive - periodically try and connect */ - /* TODO: make this an option */ - else - { - - monitoring_conn = establish_db_connection(monitored_node->conninfo, false); - - if (PQstatus(monitoring_conn) == CONNECTION_OK) + } + case MS_DEGRADED: { - // XXX event bdr_node_recovered -> if monitored == local node - - if (monitored_node->node_id == config_file_options.node_id) + /* degraded monitoring */ + if (is_server_available(local_node_info.conninfo) == true) { - log_notice(_("local connection has returned, resuming monitoring")); + log_notice(_("monitored node %i has recovered"), local_node_info.node_id); + // do_bdr_recovery() } else { - log_notice(_("connection to '%s' has returned, resuming monitoring"), monitored_node->node_name); + log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")", + config_file_options.monitor_interval_secs); + sleep(config_file_options.monitor_interval_secs); } } - else - { - sleep(config_file_options.monitor_interval_secs); - } - - - if (got_SIGHUP) - { - /* - * if we can reload, then could need to change - * local_conn - */ - if (reload_config(&config_file_options)) - { - if (PQstatus(local_conn) == CONNECTION_OK) - { - PQfinish(local_conn); - local_conn = establish_db_connection(config_file_options.conninfo, true); - update_registration(local_conn); - } - } - - /* reload node list */ - if (PQstatus(local_conn) == CONNECTION_OK) - get_node_records_by_priority(local_conn, &nodes); - - got_SIGHUP = false; - } } - failover_done = false; + if (got_SIGHUP) + { + /* + * if we can reload, then could need to change + * local_conn + */ + if (reload_config(&config_file_options)) + { + PQfinish(local_conn); + local_conn = establish_db_connection(config_file_options.conninfo, true); + update_registration(local_conn); + } + + /* reload node list */ + get_all_node_records(local_conn, &nodes); + + got_SIGHUP = false; + } + } return; @@ -294,43 +206,44 @@ monitor_bdr(void) * we'll do the following: * * - attempt to find another node, to set our node record as inactive + * (there should be only one other node) * - generate an event log record on that node * - optionally execute `bdr_failover_command`, passing the conninfo string * of that node to the command; this can be used for e.g. reconfiguring * pgbouncer. - * - if mode is 'BDR_MONITORING_PRIORITY', redirect monitoring to that node. * */ -t_node_info * -do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node) + +void +do_bdr_failover(NodeInfoList *nodes) { PGconn *next_node_conn = NULL; NodeInfoListCell *cell; bool failover_success = false; PQExpBufferData event_details; + RecordStatus record_status; t_event_info event_info = T_EVENT_INFO_INITIALIZER; - t_node_info *new_monitored_node = NULL; + t_node_info target_node = T_NODE_INFO_INITIALIZER; initPQExpBuffer(&event_details); - /* get next active priority node */ + /* get next active node */ for (cell = nodes->head; cell; cell = cell->next) { log_debug("do_bdr_failover() %s", cell->node_info->node_name); /* don't attempt to connect to the current monitored node, as that's the one which has failed */ - if (cell->node_info->node_id == monitored_node->node_id) + if (cell->node_info->node_id == local_node_info.node_id) continue; /* XXX skip inactive node? */ - next_node_conn = establish_db_connection(cell->node_info->conninfo, false); if (PQstatus(next_node_conn) == CONNECTION_OK) { // XXX check if record returned - new_monitored_node = get_node_record_pointer(next_node_conn, cell->node_info->node_id); + record_status = get_node_record(next_node_conn, cell->node_info->node_id, &target_node); break; } @@ -345,40 +258,34 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node) log_error("%s", event_details.data); - // no other nodes found // continue degraded monitoring until node is restored? } else { - log_info(_("connecting to target node %s"), cell->node_info->node_name); + log_info(_("connecting to target node %s"), target_node.node_name); failover_success = true; - event_info.conninfo_str = cell->node_info->conninfo; - event_info.node_name = cell->node_info->node_name; + event_info.conninfo_str = target_node.conninfo; + event_info.node_name = target_node.node_name; /* update our own record on the other node */ - if (monitored_node->node_id == config_file_options.node_id) - { - update_node_record_set_active(next_node_conn, monitored_node->node_id, false); - } - - if (config_file_options.bdr_monitoring_mode == BDR_MONITORING_PRIORITY) - { - log_notice(_("monitoring next available node by prioriy: %s (ID %i)"), - new_monitored_node->node_name, - new_monitored_node->node_id); - } + update_node_record_set_active(next_node_conn, local_node_info.node_id, false); appendPQExpBuffer(&event_details, _("node '%s' (ID: %i) detected as failed; next available node is '%s' (ID: %i)"), - monitored_node->node_name, - monitored_node->node_id, - cell->node_info->node_name, - cell->node_info->node_id); + local_node_info.node_name, + local_node_info.node_id, + target_node.node_name, + target_node.node_id); } + monitoring_state = MS_DEGRADED; + INSTR_TIME_SET_CURRENT(degraded_monitoring_start); + + // check here that the node hasn't come back up... + /* * Create an event record * @@ -400,11 +307,7 @@ do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node) termPQExpBuffer(&event_details); - //failover_done = true; - - if (config_file_options.bdr_monitoring_mode == BDR_MONITORING_PRIORITY) - return new_monitored_node; /* local monitoring mode - there's no new node to monitor */ - return NULL; + return; } diff --git a/repmgrd-bdr.h b/repmgrd-bdr.h index 6bbe6257..ecc0986e 100644 --- a/repmgrd-bdr.h +++ b/repmgrd-bdr.h @@ -8,6 +8,5 @@ extern void do_bdr_node_check(void); extern void monitor_bdr(void); -extern t_node_info *do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node); #endif /* _REPMGRD_BDR_H_ */ diff --git a/repmgrd-physical.c b/repmgrd-physical.c index 1384a876..4efa4d4b 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -325,7 +325,11 @@ monitor_streaming_primary(void) INSTR_TIME_SET_CURRENT(log_status_interval_start); } } - sleep(1); + + log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")", + config_file_options.monitor_interval_secs); + + sleep(config_file_options.monitor_interval_secs); } } diff --git a/repmgrd.c b/repmgrd.c index 18faad3f..00335f75 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -37,7 +37,6 @@ PGconn *local_conn = NULL; /* Collate command line errors here for friendlier reporting */ static ItemList cli_errors = { NULL, NULL }; -int server_version_num = 0; bool startup_event_logged = false; MonitoringState monitoring_state = MS_NORMAL; @@ -57,7 +56,6 @@ static void show_usage(void); static void daemonize_process(void); static void check_and_create_pid_file(const char *pid_file); - static void start_monitoring(void); diff --git a/repmgrd.h b/repmgrd.h index eb651743..56b6b3b8 100644 --- a/repmgrd.h +++ b/repmgrd.h @@ -27,7 +27,6 @@ extern t_configuration_options config_file_options; extern t_node_info local_node_info; extern PGconn *local_conn; extern bool startup_event_logged; -extern int server_version_num; PGconn *try_reconnect(const char *conninfo, NodeStatus *node_status);