From a6cc4d80f0e63d73dc512451a863447eeba19d2b Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Mon, 13 Nov 2017 14:53:12 +0900 Subject: [PATCH] Add "witness register" functionality --- configfile.c | 10 ++ configfile.h | 5 + dbutils.c | 97 ++++++++++++++++- dbutils.h | 6 ++ repmgr-action-cluster.c | 1 + repmgr-action-standby.c | 3 +- repmgr-action-witness.c | 234 ++++++++++++++++++++++++++++++++++++++++ repmgr-client.c | 18 +++- repmgr.conf.sample | 5 + repmgr.h | 28 ++--- repmgrd-physical.c | 9 ++ repmgrd-physical.h | 1 + repmgrd.c | 3 + 13 files changed, 397 insertions(+), 23 deletions(-) diff --git a/configfile.c b/configfile.c index 99cc4622..8c02d640 100644 --- a/configfile.c +++ b/configfile.c @@ -338,6 +338,12 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * options->primary_notification_timeout = DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT; options->primary_follow_timeout = DEFAULT_PRIMARY_FOLLOW_TIMEOUT; + /*------------- + * witness settings + *------------- + */ + options->witness_sync_interval = DEFAULT_WITNESS_SYNC_INTERVAL; + /*------------- * BDR settings *------------- @@ -551,6 +557,10 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList * else if (strcmp(name, "primary_follow_timeout") == 0) options->primary_follow_timeout = repmgr_atoi(value, name, error_list, 0); + /* witness settings */ + else if (strcmp(name, "witness_sync_interval") == 0) + options->witness_sync_interval = repmgr_atoi(value, name, error_list, 1); + /* BDR settings */ else if (strcmp(name, "bdr_local_monitoring_only") == 0) options->bdr_local_monitoring_only = parse_bool(value, name, error_list); diff --git a/configfile.h b/configfile.h index e122636f..59185b76 100644 --- a/configfile.h +++ b/configfile.h @@ -98,6 +98,9 @@ typedef struct int replication_lag_warning; int replication_lag_critical; + /* witness settings */ + int witness_sync_interval; + /* repmgrd settings */ failover_mode_opt failover; char location[MAXLEN]; @@ -158,6 +161,8 @@ typedef struct /* node check settings */ \ DEFAULT_ARCHIVE_READY_WARNING, DEFAULT_ARCHIVE_READY_CRITICAL, \ DEFAULT_REPLICATION_LAG_WARNING, DEFAULT_REPLICATION_LAG_CRITICAL, \ + /* witness settings */ \ + DEFAULT_WITNESS_SYNC_INTERVAL, \ /* repmgrd settings */ \ FAILOVER_MANUAL, DEFAULT_LOCATION, DEFAULT_PRIORITY, "", "", \ DEFAULT_MONITORING_INTERVAL, \ diff --git a/dbutils.c b/dbutils.c index ee29325f..d7fac046 100644 --- a/dbutils.c +++ b/dbutils.c @@ -50,6 +50,7 @@ static void _populate_node_records(PGresult *res, NodeInfoList *node_list); static bool _create_update_node_record(PGconn *conn, char *action, t_node_info *node_info); static bool _create_event(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details, t_event_info *event_info, bool send_notification); +static bool _is_bdr_db(PGconn *conn, PQExpBufferData *output, bool quiet); static void _populate_bdr_node_record(PGresult *res, t_bdr_node_info *node_info, int row); static void _populate_bdr_node_records(PGresult *res, BdrNodeInfoList *node_list); @@ -1816,6 +1817,10 @@ parse_node_type(const char *type) { return STANDBY; } + else if (strcmp(type, "witness") == 0) + { + return WITNESS; + } else if (strcmp(type, "bdr") == 0) { return BDR; @@ -1834,6 +1839,8 @@ get_node_type_string(t_server_type type) return "primary"; case STANDBY: return "standby"; + case WITNESS: + return "witness"; case BDR: return "bdr"; /* this should never happen */ @@ -2484,6 +2491,57 @@ update_node_record_conn_priority(PGconn *conn, t_configuration_options *options) } +/* + * Copy node records from primary to witness servers. + * + * This is used when initially registering a witness server, and + * by repmgrd to update the node records when required. + */ + +bool +witness_copy_node_records(PGconn *primary_conn, PGconn *witness_conn) +{ + PGresult *res = NULL; + NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER; + NodeInfoListCell *cell = NULL; + + begin_transaction(witness_conn); + + /* Defer constraints */ + + res = PQexec(witness_conn, "SET CONSTRAINTS ALL DEFERRED"); + if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + { + log_error(_("unable to defer constraints:\n %s"), + PQerrorMessage(witness_conn)); + rollback_transaction(witness_conn); + + return false; + } + + /* truncate existing records */ + + if (truncate_node_records(witness_conn) == false) + { + rollback_transaction(witness_conn); + + return false; + } + + get_all_node_records(primary_conn, &nodes); + + for (cell = nodes.head; cell; cell = cell->next) + { + create_node_record(witness_conn, NULL, cell->node_info); + } + + /* and done */ + commit_transaction(witness_conn); + + return true; +} + + bool delete_node_record(PGconn *conn, int node) { @@ -2515,6 +2573,24 @@ delete_node_record(PGconn *conn, int node) return true; } +bool +truncate_node_records(PGconn *conn) +{ + PGresult *res = NULL; + + res = PQexec(conn, "TRUNCATE TABLE repmgr.nodes"); + + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + log_error(_("unable to truncate node record table:\n %s"), + PQerrorMessage(conn)); + PQclear(res); + return false; + } + + PQclear(res); + return true; +} void get_node_replication_stats(PGconn *conn, t_node_info *node_info) @@ -3906,8 +3982,8 @@ get_last_wal_receive_location(PGconn *conn) /* BDR functions */ /* ============= */ -bool -is_bdr_db(PGconn *conn, PQExpBufferData *output) +static bool +_is_bdr_db(PGconn *conn, PQExpBufferData *output, bool quiet) { PQExpBufferData query; PGresult *res = NULL; @@ -3938,7 +4014,7 @@ is_bdr_db(PGconn *conn, PQExpBufferData *output) if (output != NULL) appendPQExpBuffer(output, "%s", warning); - else + else if (quiet == false) log_warning("%s", warning); return is_bdr_db; @@ -3959,7 +4035,7 @@ is_bdr_db(PGconn *conn, PQExpBufferData *output) if (output != NULL) appendPQExpBuffer(output, "%s", warning); - else + else if (quiet == false) log_warning("%s", warning); } @@ -3968,6 +4044,19 @@ is_bdr_db(PGconn *conn, PQExpBufferData *output) return is_bdr_db; } +bool +is_bdr_db(PGconn *conn, PQExpBufferData *output) +{ + return _is_bdr_db(conn, output, false); +} + +bool +is_bdr_db_quiet(PGconn *conn) +{ + return _is_bdr_db(conn, NULL, true); +} + + bool is_active_bdr_node(PGconn *conn, const char *node_name) diff --git a/dbutils.h b/dbutils.h index 02618757..93727cdd 100644 --- a/dbutils.h +++ b/dbutils.h @@ -38,6 +38,7 @@ typedef enum UNKNOWN = 0, PRIMARY, STANDBY, + WITNESS, BDR } t_server_type; @@ -413,6 +414,7 @@ void get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list); bool create_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info); bool update_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info); bool delete_node_record(PGconn *conn, int node); +bool truncate_node_records(PGconn *conn); bool update_node_record_set_active(PGconn *conn, int this_node_id, bool active); bool update_node_record_set_primary(PGconn *conn, int this_node_id); @@ -420,6 +422,9 @@ bool update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_up bool update_node_record_status(PGconn *conn, int this_node_id, char *type, int upstream_node_id, bool active); bool update_node_record_conn_priority(PGconn *conn, t_configuration_options *options); +bool witness_copy_node_records(PGconn *primary_conn, PGconn *witness_conn); + + void clear_node_info_list(NodeInfoList *nodes); /* PostgreSQL configuration file location functions */ @@ -489,6 +494,7 @@ bool is_downstream_node_attached(PGconn *conn, char *node_name); void get_all_bdr_node_records(PGconn *conn, BdrNodeInfoList *node_list); RecordStatus get_bdr_node_record_by_name(PGconn *conn, const char *node_name, t_bdr_node_info *node_info); bool is_bdr_db(PGconn *conn, PQExpBufferData *output); +bool is_bdr_db_quiet(PGconn *conn); bool is_active_bdr_node(PGconn *conn, const char *node_name); bool is_bdr_repmgr(PGconn *conn); bool is_table_in_bdr_replication_set(PGconn *conn, const char *tablename, const char *set); diff --git a/repmgr-action-cluster.c b/repmgr-action-cluster.c index 107d5363..2a9e7944 100644 --- a/repmgr-action-cluster.c +++ b/repmgr-action-cluster.c @@ -279,6 +279,7 @@ do_cluster_show(void) } } break; + case WITNESS: case BDR: { /* node is reachable */ diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 606a776b..eff28b76 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -806,7 +806,6 @@ do_standby_register(void) } } - if (PQstatus(conn) == CONNECTION_OK) { check_recovery_type(conn); @@ -838,7 +837,7 @@ do_standby_register(void) if (PQstatus(primary_conn) != CONNECTION_OK) { log_error(_("unable to connect to the primary database")); - log_hint(_("a primary must be configured before registering a standby")); + log_hint(_("a primary node must be configured before registering a standby node")); exit(ERR_BAD_CONFIG); } diff --git a/repmgr-action-witness.c b/repmgr-action-witness.c index b69ef24b..3abfcd4e 100644 --- a/repmgr-action-witness.c +++ b/repmgr-action-witness.c @@ -31,6 +31,240 @@ void do_witness_register(void) { + PGconn *witness_conn = NULL; + PGconn *primary_conn = NULL; + RecoveryType recovery_type = RECTYPE_UNKNOWN; + NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER; + t_node_info node_record = T_NODE_INFO_INITIALIZER; + RecordStatus record_status = RECORD_NOT_FOUND; + bool record_created = false; + + char repmgr_user[MAXLEN]; + char repmgr_db[MAXLEN]; + + log_info(_("connecting to witness node \"%s\" (ID: %i)"), + config_file_options.node_name, + config_file_options.node_id); + + witness_conn = establish_db_connection_quiet(config_file_options.conninfo); + + if (PQstatus(witness_conn) != CONNECTION_OK) + { + log_error(_("unable to connect to local node \"%s\" (ID: %i)"), + config_file_options.node_name, + config_file_options.node_id); + log_detail("%s", + PQerrorMessage(witness_conn)); + exit(ERR_BAD_CONFIG); + } + + /* check witness node's recovery type */ + recovery_type = get_recovery_type(witness_conn); + + if (recovery_type == RECTYPE_STANDBY) + { + log_error(_("provided node is a standby")); + log_error(_("a witness node must run on an independent primary server")); + + PQfinish(witness_conn); + + exit(ERR_BAD_CONFIG); + } + + /* check that witness node is not a BDR node */ + if (is_bdr_db_quiet(witness_conn) == true) + { + log_error(_("witness node is a BDR node")); + log_hint(_("a witness node cannot be configured for a BDR cluster")); + + PQfinish(witness_conn); + + exit(ERR_BAD_CONFIG); + } + + + /* connect to primary with provided parameters */ + log_info(_("connecting to primary node")); + /* + * Extract the repmgr user and database names from the conninfo string + * provided in repmgr.conf + */ + get_conninfo_value(config_file_options.conninfo, "user", repmgr_user); + get_conninfo_value(config_file_options.conninfo, "dbname", repmgr_db); + + param_set_ine(&source_conninfo, "user", repmgr_user); + param_set_ine(&source_conninfo, "dbname", repmgr_db); + + /* We need to connect to check configuration and copy it */ + primary_conn = establish_db_connection_by_params(&source_conninfo, false); + + if (PQstatus(primary_conn) != CONNECTION_OK) + { + log_error(_("unable to connect to the primary node")); + log_hint(_("a primary node must be configured before registering a witness node")); + + PQfinish(witness_conn); + + exit(ERR_BAD_CONFIG); + } + + /* check primary node's recovery type */ + recovery_type = get_recovery_type(witness_conn); + + if (recovery_type == RECTYPE_STANDBY) + { + log_error(_("provided primary node is a standby")); + log_error(_("provide the connection details of the cluster's primary server")); + + PQfinish(witness_conn); + PQfinish(primary_conn); + + exit(ERR_BAD_CONFIG); + } + + /* check that primary node is not a BDR node */ + if (is_bdr_db_quiet(primary_conn) == true) + { + log_error(_("primary node is a BDR node")); + log_hint(_("a witness node cannot be configured for a BDR cluster")); + + PQfinish(witness_conn); + PQfinish(primary_conn); + + exit(ERR_BAD_CONFIG); + } + + /* XXX sanity check witness node is not part of main cluster */ + + + /* create repmgr extension, if does not exist */ + if (!create_repmgr_extension(witness_conn)) + { + PQfinish(witness_conn); + PQfinish(primary_conn); + + exit(ERR_BAD_CONFIG); + } + + /* + * check if node record exists on primary, overwrite if -F/--force provided, + * otherwise exit with error + */ + + record_status = get_node_record(primary_conn, + config_file_options.node_id, + &node_record); + + if (record_status == RECORD_FOUND) + { + /* + * If node is not a witness, cowardly refuse to do anything, let the + * user work out what's the correct thing to do. + */ + if (node_record.type != WITNESS) + { + log_error(_("node \"%s\" (ID: %i) is already registered as a %s node"), + config_file_options.node_name, + config_file_options.node_id, + get_node_type_string(node_record.type)); + log_hint(_("use \"repmgr %s unregister\" to remove a non-witness node record"), + get_node_type_string(node_record.type)); + + PQfinish(witness_conn); + PQfinish(primary_conn); + + exit(ERR_BAD_CONFIG); + } + + if (!runtime_options.force) + { + log_error(_("witness node is already registered")); + log_hint(_("use option -F/--force to reregister the node")); + + + PQfinish(witness_conn); + PQfinish(primary_conn); + + exit(ERR_BAD_CONFIG); + } + } + + + // XXX check other node with same name does not exist + + /* + * if repmgr.nodes contains entries, delete if -F/--force provided, + * otherwise exit with error + */ + get_all_node_records(primary_conn, &nodes); + + log_verbose(LOG_DEBUG, "%i node records found", nodes.node_count); + + if (nodes.node_count > 0) + { + if (!runtime_options.force) + { + log_error(_("witness node is already initialised")); + log_hint(_("use option -F/--force to reinitialise the node")); + PQfinish(primary_conn); + PQfinish(witness_conn); + exit(ERR_BAD_CONFIG); + } + + if (truncate_node_records(witness_conn) == false) + { + PQfinish(primary_conn); + PQfinish(witness_conn); + exit(ERR_BAD_CONFIG); + } + } + + /* create record on primary */ + + /* + * node record exists - update it (at this point we have already + * established that -F/--force is in use) + */ + + init_node_record(&node_record); + node_record.type = WITNESS; + node_record.priority = 0; + node_record.upstream_node_id = NO_UPSTREAM_NODE; + + if (record_status == RECORD_FOUND) + { + + record_created = update_node_record(primary_conn, + "witness register", + &node_record); + } + else + { + record_created = create_node_record(primary_conn, + "witness register", + &node_record); + } + + + /* sync records */ + if (witness_copy_node_records(primary_conn, witness_conn) == false) + { + log_error(_("unable to copy repmgr node records from primary")); + PQfinish(primary_conn); + PQfinish(witness_conn); + exit(ERR_BAD_CONFIG); + } + + /* create event */ + + PQfinish(primary_conn); + + PQfinish(witness_conn); + + log_info(_("witness registration complete")); + log_notice(_("witness node \"%s\" (ID: %i) successfully registered"), + config_file_options.node_name, config_file_options.node_id); + return; } diff --git a/repmgr-client.c b/repmgr-client.c index 60966769..f08dddc5 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -1346,6 +1346,16 @@ check_cli_parameters(const int action) */ } break; + case WITNESS_REGISTER: + { + if (!runtime_options.host_param_provided) + { + item_list_append_format(&cli_errors, + _("host name for the source node must be provided when executing %s"), + action_name(action)); + } + } + break; case NODE_STATUS: if (runtime_options.node_id != UNKNOWN_NODE_ID) { @@ -1689,6 +1699,11 @@ action_name(const int action) case STANDBY_FOLLOW: return "STANDBY FOLLOW"; + case WITNESS_REGISTER: + return "WITNESS REGISTER"; + case WITNESS_UNREGISTER: + return "WITNESS UNREGISTER"; + case BDR_REGISTER: return "BDR REGISTER"; case BDR_UNREGISTER: @@ -2670,7 +2685,6 @@ init_node_record(t_node_info *node_record) node_record->priority = config_file_options.priority; node_record->active = true; - if (config_file_options.location[0] != '\0') strncpy(node_record->location, config_file_options.location, MAXLEN); else @@ -2699,6 +2713,4 @@ init_node_record(t_node_info *node_record) { maxlen_snprintf(node_record->slot_name, "repmgr_slot_%i", config_file_options.node_id); } - - } diff --git a/repmgr.conf.sample b/repmgr.conf.sample index bb08ed87..339a8f4e 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -68,7 +68,12 @@ #recovery_min_apply_delay= # If provided, "recovery_min_apply_delay" in recovery.conf # will be set to this value. +#------------------------------------------------------------------------------ +# Witness server settings +#------------------------------------------------------------------------------ +#witness_sync_interval=15 # interval (in seconds) to synchronise node records + # to the witness server #------------------------------------------------------------------------------ # Logging settings diff --git a/repmgr.h b/repmgr.h index 0e439717..0a2b00c8 100644 --- a/repmgr.h +++ b/repmgr.h @@ -61,20 +61,20 @@ * various default values - ensure repmgr.conf.sample is update * if any of these are changed */ -#define DEFAULT_LOCATION "default" -#define DEFAULT_PRIORITY 100 -#define DEFAULT_RECONNECTION_ATTEMPTS 6 /* seconds */ -#define DEFAULT_RECONNECTION_INTERVAL 10 /* seconds */ -#define DEFAULT_MONITORING_INTERVAL 2 /* seconds */ -#define DEFAULT_ASYNC_QUERY_TIMEOUT 60 /* seconds */ -#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */ -#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 /* seconds */ -#define DEFAULT_BDR_RECOVERY_TIMEOUT 30 /* seconds */ -#define DEFAULT_ARCHIVE_READY_WARNING 16 /* WAL files */ -#define DEFAULT_ARCHIVE_READY_CRITICAL 128 /* WAL files */ -#define DEFAULT_REPLICATION_LAG_WARNING 300 /* seconds */ -#define DEFAULT_REPLICATION_LAG_CRITICAL 600 /* seconds */ - +#define DEFAULT_LOCATION "default" +#define DEFAULT_PRIORITY 100 +#define DEFAULT_RECONNECTION_ATTEMPTS 6 /* seconds */ +#define DEFAULT_RECONNECTION_INTERVAL 10 /* seconds */ +#define DEFAULT_MONITORING_INTERVAL 2 /* seconds */ +#define DEFAULT_ASYNC_QUERY_TIMEOUT 60 /* seconds */ +#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */ +#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 /* seconds */ +#define DEFAULT_BDR_RECOVERY_TIMEOUT 30 /* seconds */ +#define DEFAULT_ARCHIVE_READY_WARNING 16 /* WAL files */ +#define DEFAULT_ARCHIVE_READY_CRITICAL 128 /* WAL files */ +#define DEFAULT_REPLICATION_LAG_WARNING 300 /* seconds */ +#define DEFAULT_REPLICATION_LAG_CRITICAL 600 /* seconds */ +#define DEFAULT_WITNESS_SYNC_INTERVAL 15 /* seconds */ #ifndef RECOVERY_COMMAND_FILE #define RECOVERY_COMMAND_FILE "recovery.conf" diff --git a/repmgrd-physical.c b/repmgrd-physical.c index 2e456715..1d5fa050 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -1012,6 +1012,15 @@ loop: #endif } + +void +monitor_streaming_witness(void) +{ + log_error("not yet implemented"); + return; +} + + #ifndef BDR_ONLY static bool do_primary_failover(void) diff --git a/repmgrd-physical.h b/repmgrd-physical.h index 2eac428a..a20060d2 100644 --- a/repmgrd-physical.h +++ b/repmgrd-physical.h @@ -23,6 +23,7 @@ void do_physical_node_check(void); void monitor_streaming_primary(void); void monitor_streaming_standby(void); +void monitor_streaming_witness(void); void close_connections_physical(void); #endif /* _REPMGRD_PHYSICAL_H_ */ diff --git a/repmgrd.c b/repmgrd.c index 8f88aeaa..b9407fb2 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -407,6 +407,9 @@ start_monitoring(void) case STANDBY: monitor_streaming_standby(); break; + case WITNESS: + monitor_streaming_witness(); + break; #else case PRIMARY: case STANDBY: