From 0ebd9c15d953c24d3e75828183af3dd4d6e3cf52 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 29 Mar 2016 16:46:18 +0900 Subject: [PATCH] Regularly sync witness server repl_nodes table. Although the witness server will resync the repl_nodes table following a failover, other operations (e.g. removing or cloning a standby) were previously not reflected in the witness server's copy of this table. As a short-term workaround, automatically resync the table at regular intervals (defined by the configuration file parameter "witness_repl_nodes_sync_interval_secs", default 30 seconds). --- config.c | 5 +++++ config.h | 3 ++- dbutils.c | 20 ++++++++++++-------- dbutils.h | 2 +- repmgr.c | 11 ++++++----- repmgr.conf.sample | 3 +++ repmgrd.c | 26 +++++++++++++++++++++++++- 7 files changed, 54 insertions(+), 16 deletions(-) diff --git a/config.c b/config.c index 46ab9c25..c32166bb 100644 --- a/config.c +++ b/config.c @@ -235,6 +235,9 @@ parse_config(t_configuration_options *options) options->monitor_interval_secs = 2; options->retry_promote_interval_secs = 300; + /* default to resyncing repl_nodes table every 30 seconds on the witness server */ + options->witness_repl_nodes_sync_interval_secs = 30; + memset(options->event_notification_command, 0, sizeof(options->event_notification_command)); options->tablespace_mapping.head = NULL; @@ -358,6 +361,8 @@ parse_config(t_configuration_options *options) options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors, false); else if (strcmp(name, "retry_promote_interval_secs") == 0) options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors, false); + else if (strcmp(name, "witness_repl_nodes_sync_interval_secs") == 0) + options->witness_repl_nodes_sync_interval_secs = repmgr_atoi(value, "witness_repl_nodes_sync_interval_secs", &config_errors, false); else if (strcmp(name, "use_replication_slots") == 0) /* XXX we should have a dedicated boolean argument format */ options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors, false); diff --git a/config.h b/config.h index 3d65637f..b5f3099b 100644 --- a/config.h +++ b/config.h @@ -75,13 +75,14 @@ typedef struct char logfile[MAXLEN]; int monitor_interval_secs; int retry_promote_interval_secs; + int witness_repl_nodes_sync_interval_secs; int use_replication_slots; char event_notification_command[MAXLEN]; EventNotificationList event_notifications; TablespaceList tablespace_mapping; } t_configuration_options; -#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} } +#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} } typedef struct ErrorListCell { diff --git a/dbutils.c b/dbutils.c index 0b12517d..9bc8792d 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1138,7 +1138,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name) } sqlquery_snprintf(sqlquery, - "SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name FROM %s.repl_nodes", + "SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name, active FROM %s.repl_nodes", get_repmgr_schema_quoted(masterconn)); log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery); @@ -1158,7 +1158,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name) log_verbose(LOG_DEBUG, "copy_configuration(): writing node record for node %s (id: %s)\n", - PQgetvalue(res, i, 4), + PQgetvalue(res, i, 3), PQgetvalue(res, i, 0)); node_record_created = create_node_record(witnessconn, @@ -1174,7 +1174,10 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name) atoi(PQgetvalue(res, i, 5)), strlen(PQgetvalue(res, i, 6)) ? PQgetvalue(res, i, 6) - : NULL + : NULL, + (strcmp(PQgetvalue(res, i, 7), "t") == 0) + ? true + : false ); if (node_record_created == false) @@ -1200,7 +1203,7 @@ copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name) * XXX we should pass the record parameters as a struct. */ bool -create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name) +create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active) { char sqlquery[QUERY_STR_LEN]; char upstream_node_id[MAXLEN]; @@ -1241,8 +1244,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes " " (id, type, upstream_node_id, cluster, " - " name, conninfo, slot_name, priority) " - "VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i) ", + " name, conninfo, slot_name, priority, active) " + "VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i, %s) ", get_repmgr_schema_quoted(conn), node, type, @@ -1251,7 +1254,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea node_name, conninfo, slot_name_buf, - priority); + priority, + active == true ? "TRUE" : "FALSE"); log_verbose(LOG_DEBUG, "create_node_record(): %s\n", sqlquery); @@ -1291,7 +1295,7 @@ delete_node_record(PGconn *conn, int node, char *action) if (action != NULL) { - log_verbose(LOG_DEBUG, "create_node_record(): action is \"%s\"\n", action); + log_verbose(LOG_DEBUG, "delete_node_record(): action is \"%s\"\n", action); } res = PQexec(conn, sqlquery); diff --git a/dbutils.h b/dbutils.h index df9f1065..bc34d59c 100644 --- a/dbutils.h +++ b/dbutils.h @@ -122,7 +122,7 @@ bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint); bool stop_backup(PGconn *conn, char *last_wal_segment); bool set_config_bool(PGconn *conn, const char *config_param, bool state); bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name); -bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name); +bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active); bool delete_node_record(PGconn *conn, int node, char *action); int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info); bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active); diff --git a/repmgr.c b/repmgr.c index 5634d80e..98304442 100644 --- a/repmgr.c +++ b/repmgr.c @@ -1071,7 +1071,8 @@ do_master_register(void) options.node_name, options.conninfo, options.priority, - repmgr_slot_name_ptr); + repmgr_slot_name_ptr, + true); if (record_created == false) { @@ -1172,9 +1173,8 @@ do_standby_register(void) options.node_name, options.conninfo, options.priority, - repmgr_slot_name_ptr); - - + repmgr_slot_name_ptr, + true); if (record_created == false) { @@ -3784,7 +3784,8 @@ do_witness_create(void) options.node_name, options.conninfo, options.priority, - NULL); + NULL, + true); if (record_created == false) { diff --git a/repmgr.conf.sample b/repmgr.conf.sample index cba88886..268a41e1 100644 --- a/repmgr.conf.sample +++ b/repmgr.conf.sample @@ -146,3 +146,6 @@ monitor_interval_secs=2 # seconds; by default this would be half an hour, as 'retry_promote_interval_secs' # default value is 300) retry_promote_interval_secs=300 + +# Number of seconds after which the witness server resyncs the repl_nodes table +witness_repl_nodes_sync_interval_secs=15 diff --git a/repmgrd.c b/repmgrd.c index 16cf2a89..b5b3f04a 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -298,9 +298,12 @@ main(int argc, char **argv) */ do { + /* Timer for repl_nodes synchronisation interval */ + int sync_repl_nodes_elapsed = 0; + /* * Set my server mode, establish a connection to master and start - * monitor + * monitoring */ switch (node_info.type) @@ -472,6 +475,24 @@ main(int argc, char **argv) sleep(local_options.monitor_interval_secs); + /* + * On a witness node, regularly resync the repl_nodes table + * to keep up with any changes on the primary + * + * TODO: only resync the table if changes actually detected + */ + if (node_info.type == WITNESS) + { + sync_repl_nodes_elapsed += local_options.monitor_interval_secs; + log_debug(_("%i - %i \n"), sync_repl_nodes_elapsed, local_options.witness_repl_nodes_sync_interval_secs); + if(sync_repl_nodes_elapsed >= local_options.witness_repl_nodes_sync_interval_secs) + { + log_debug(_("Resyncing repl_nodes table\n")); + copy_configuration(master_conn, my_local_conn, local_options.cluster_name); + sync_repl_nodes_elapsed = 0; + } + } + if (got_SIGHUP) { /* @@ -486,6 +507,7 @@ main(int argc, char **argv) } got_SIGHUP = false; } + if (failover_done) { log_debug(_("standby check loop will terminate\n")); @@ -1954,6 +1976,8 @@ check_node_configuration(void) /* Adding the node */ log_info(_("adding node %d to cluster '%s'\n"), local_options.node, local_options.cluster_name); + + /* XXX use create_node_record() */ sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes" " (id, cluster, name, conninfo, priority, witness) "