From 23c99304a69b32c13a52e6b4cd3f3d5fcecb1400 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 3 Apr 2018 09:13:32 +0900 Subject: [PATCH] "node rejoin": actively check for node to rejoin cluster Previously repmgr was relying on whatever command was configured to start PostgreSQL to determine whether the node being rejoined had started correctly. However it's preferable to actively poll the upstream to confirm it has restarted and actually attached as a standby before confirming success of the "node rejoin" action. This can be overridden with the -W/--no-wait option. (Note that for consistency with other PostgreSQL utilities, the short form of the --wait option is now "-w"; this is currently only used in "repmgr standby follow".) Also update "repmgr node rejoin" documentation with a list of supported options, and add some useful index entries for "pg_rewind". Implements GitHub #415. --- doc/repmgr-node-rejoin.sgml | 80 ++++++++++++++++++++++- doc/repmgr-standby-follow.sgml | 2 +- doc/switchover.sgml | 6 +- errcode.h | 1 + repmgr-action-node.c | 112 +++++++++++++++++++++++++++++---- repmgr-client-global.h | 3 +- repmgr-client.c | 46 +++++++++++++- repmgr-client.h | 4 +- 8 files changed, 234 insertions(+), 20 deletions(-) diff --git a/doc/repmgr-node-rejoin.sgml b/doc/repmgr-node-rejoin.sgml index d7583e32..f8015ce1 100644 --- a/doc/repmgr-node-rejoin.sgml +++ b/doc/repmgr-node-rejoin.sgml @@ -45,6 +45,77 @@ + + + Options + + + + + + + Check prerequisites but don't actually execute the rejoin. + + + + + + + + + Execute pg_rewind if necessary. + + + It is only necessary to provide the pg_rewind + if using PostgreSQL 9.3 or 9.4, and pg_rewind + is not installed in the PostgreSQL bin directory. + + + + + + + + + comma-separated list of configuration files to retain after + executing pg_rewind. + + + Currently pg_rewind will overwrite + the local node's configuration files with the files from the source node, + so it's advisable to use this option to ensure they are kept. + + + + + + + + + + Directory to temporarily store configuration files specified with + ; default: /tmp. + + + + + + + + + + Don't wait for the node to rejoin cluster. + + + If this option is supplied, &repmgr; will restart the node but + not wait for it to connect to the primary. + + + + + + + Event notifications @@ -77,11 +148,18 @@ + + + pg_rewind + using with "repmgr node rejoin" + + Using <command>pg_rewind</command> repmgr node rejoin can optionally use pg_rewind to re-integrate a node which has diverged from the rest of the cluster, typically a failed primary. - pg_rewind is available in PostgreSQL 9.5 and later. + pg_rewind is available in PostgreSQL 9.5 and later as part of the core distribution, + and can be installed from external sources for PostgreSQL 9.3 and 9.4. diff --git a/doc/repmgr-standby-follow.sgml b/doc/repmgr-standby-follow.sgml index 4a98b398..34b67c70 100644 --- a/doc/repmgr-standby-follow.sgml +++ b/doc/repmgr-standby-follow.sgml @@ -71,7 +71,7 @@ - + diff --git a/doc/switchover.sgml b/doc/switchover.sgml index 74109e13..00e3d54a 100644 --- a/doc/switchover.sgml +++ b/doc/switchover.sgml @@ -180,6 +180,10 @@ + + pg_rewind + using with "repmgr standby switchover" + Switchover and pg_rewind If the demotion candidate does not shut down smoothly or cleanly, there's a risk it @@ -206,7 +210,7 @@ pg_rewind has been part of the core PostgreSQL distribution since - version 9.5. Users of versions 9.3 and 9.4 will need to manually install; source code available here: + version 9.5. Users of versions 9.3 and 9.4 will need to manually install it; the source code is available here: https://github.com/vmware/pg_rewind. If the pg_rewind binary is not installed in the PostgreSQL bin directory, provide diff --git a/errcode.h b/errcode.h index 9e4fd158..5cf39b7d 100644 --- a/errcode.h +++ b/errcode.h @@ -45,5 +45,6 @@ #define ERR_OUT_OF_MEMORY 21 #define ERR_SWITCHOVER_INCOMPLETE 22 #define ERR_FOLLOW_FAIL 23 +#define ERR_REJOIN_FAIL 24 #endif /* _ERRCODE_H_ */ diff --git a/repmgr-action-node.c b/repmgr-action-node.c index 1defef5e..d079c341 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -1758,7 +1758,17 @@ do_node_rejoin(void) PQfinish(upstream_conn); /* connect to registered primary and check it's not in recovery */ - upstream_conn = establish_db_connection(primary_node_record.conninfo, true); + upstream_conn = establish_db_connection(primary_node_record.conninfo, false); + + if (PQstatus(upstream_conn) != CONNECTION_OK) + { + log_error(_("unable to connect to current primary \"%s\" (node ID: %i)"), + primary_node_record.node_name, + primary_node_record.node_id); + log_detail(_("primay node conninfo is: \"%s\""), + primary_node_record.conninfo); + exit(ERR_BAD_CONFIG); + } upstream_recovery_type = get_recovery_type(upstream_conn); @@ -1992,22 +2002,99 @@ do_node_rejoin(void) } /* - * XXX add checks that node actually started and connected to primary, - * if not exit with ERR_REJOIN_FAIL + * Actively check that node actually started and connected to primary, + * if not exit with ERR_REJOIN_FAIL. + * + * This check can be overridden with -W/--no-wait, in which case a one-time + * check will be carried out. */ + if (runtime_options.no_wait == false) + { + int i; - create_event_notification(upstream_conn, - &config_file_options, - config_file_options.node_id, - "node_rejoin", - success, - follow_output.data); + for (i = 0; i < config_file_options.standby_reconnect_timeout; i++) + { + if (is_server_available(config_file_options.conninfo)) + { + log_verbose(LOG_INFO, _("demoted primary is pingable")); + break; + } - PQfinish(upstream_conn); + if (i % 5 == 0) + { + log_verbose(LOG_INFO, _("waiting for node %i to respond to pings; %i of max %i attempts"), + config_file_options.node_id, + i + 1, config_file_options.standby_reconnect_timeout); + } + else + { + log_debug("sleeping 1 second waiting for node %i to respond to pings; %i of max %i attempts", + config_file_options.node_id, + i + 1, config_file_options.standby_reconnect_timeout); + } - log_notice(_("NODE REJOIN successful")); - log_detail("%s", follow_output.data); + sleep(1); + } + for (; i < config_file_options.standby_reconnect_timeout; i++) + { + success = is_downstream_node_attached(upstream_conn, config_file_options.node_name); + + if (success == true) + { + log_verbose(LOG_INFO, _("node %i has attached to its upstream node"), + config_file_options.node_id); + break; + } + + if (i % 5 == 0) + { + log_info(_("waiting for node %i to connect to new primary; %i of max %i attempts"), + config_file_options.node_id, + i + 1, config_file_options.standby_reconnect_timeout); + } + else + { + log_debug("sleeping 1 second waiting for node %i to connect to new primary; %i of max %i attempts", + config_file_options.node_id, + i + 1, config_file_options.standby_reconnect_timeout); + } + + sleep(1); + } + + create_event_notification(upstream_conn, + &config_file_options, + config_file_options.node_id, + "node_rejoin", + success, + follow_output.data); + + if (success == false) + { + termPQExpBuffer(&follow_output); + log_notice(_("NODE REJOIN failed")); + exit(ERR_REJOIN_FAIL); + } + } + else + { + success = is_downstream_node_attached(upstream_conn, config_file_options.node_name); + } + + + if (success == true) + { + log_notice(_("NODE REJOIN successful")); + log_detail("%s", follow_output.data); + } + else + { + /* + * if we reach here, no record found in upstream node's pg_stat_replication */ + log_notice(_("NODE REJOIN has completed but node is not yet reattached to upstream")); + log_hint(_("you will need to manually check the node's replication status")); + } termPQExpBuffer(&follow_output); return; @@ -2474,6 +2561,7 @@ do_node_help(void) " after executing \"pg_rewind\"\n")); printf(_(" --config-archive-dir directory to temporarily store retained configuration files\n" \ " (default: /tmp)\n")); + printf(_(" -W/--no-wait don't wait for the node to rejoin cluster\n")); puts(""); printf(_("NODE SERVICE\n")); diff --git a/repmgr-client-global.h b/repmgr-client-global.h index 8814fff5..cf07c931 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -42,6 +42,7 @@ typedef struct bool force; char pg_bindir[MAXLEN]; /* overrides setting in repmgr.conf */ bool wait; + bool no_wait; /* logging options */ char log_level[MAXLEN]; /* overrides setting in repmgr.conf */ @@ -134,7 +135,7 @@ typedef struct /* configuration metadata */ \ false, false, false, false, \ /* general configuration options */ \ - "", false, false, "", false, \ + "", false, false, "", false, false, \ /* logging options */ \ "", false, false, false, \ /* output options */ \ diff --git a/repmgr-client.c b/repmgr-client.c index aaff7c0b..454673e5 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -178,7 +178,7 @@ main(int argc, char **argv) strncpy(runtime_options.username, pw->pw_name, MAXLEN); } - while ((c = getopt_long(argc, argv, "?Vb:f:FWd:h:p:U:R:S:D:ck:L:tvC:", long_options, + while ((c = getopt_long(argc, argv, "?Vb:f:FwWd:h:p:U:R:S:D:ck:L:tvC:", long_options, &optindex)) != -1) { /* @@ -243,11 +243,16 @@ main(int argc, char **argv) strncpy(runtime_options.replication_user, optarg, MAXLEN); break; - /* -W/--wait */ - case 'W': + /* -w/--wait */ + case 'w': runtime_options.wait = true; break; + /* -W/--no-wait */ + case 'W': + runtime_options.no_wait = true; + break; + /*---------------------------- * database connection options *---------------------------- @@ -1571,6 +1576,41 @@ check_cli_parameters(const int action) } } + /* --wait/--no-wait */ + + if (runtime_options.wait == true && runtime_options.no_wait == true) + { + item_list_append_format(&cli_errors, + _("both --wait and --no-wait options provided")); + } + else + { + if (runtime_options.wait) + { + switch (action) + { + case STANDBY_FOLLOW: + break; + default: + item_list_append_format(&cli_warnings, + _("--wait will be ignored when executing %s"), + action_name(action)); + } + } + else if (runtime_options.wait) + { + switch (action) + { + case NODE_REJOIN: + break; + default: + item_list_append_format(&cli_warnings, + _("--no-wait will be ignored when executing %s"), + action_name(action)); + } + } + } + /* repmgr node service --action */ if (runtime_options.action[0] != '\0') { diff --git a/repmgr-client.h b/repmgr-client.h index b3ad5518..f8bcc3b7 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -86,6 +86,7 @@ #define OPT_REPL_CONN 1037 #define OPT_REMOTE_NODE_ID 1038 #define OPT_RECOVERY_CONF_ONLY 1039 +#define OPT_NO_WAIT 1040 /* deprecated since 3.3 */ #define OPT_DATA_DIR 999 @@ -104,7 +105,8 @@ static struct option long_options[] = {"dry-run", no_argument, NULL, OPT_DRY_RUN}, {"force", no_argument, NULL, 'F'}, {"pg_bindir", required_argument, NULL, 'b'}, - {"wait", no_argument, NULL, 'W'}, + {"wait", no_argument, NULL, 'w'}, + {"no-wait", no_argument, NULL, 'W'}, /* connection options */ {"dbname", required_argument, NULL, 'd'},