diff --git a/doc/repmgr-standby-register.sgml b/doc/repmgr-standby-register.sgml index e4538b48..5288f7b0 100644 --- a/doc/repmgr-standby-register.sgml +++ b/doc/repmgr-standby-register.sgml @@ -37,7 +37,24 @@ - + + Waiting for the the standby to start + + By default, &repmgr; will wait 30 seconds for the standby to become available before + aborting with a connection error. This is useful when setting up a standby from a script, + as the standby may not have fully started up by the time repmgr standby register + is executed. + + + To change the timeout, pass the desired value with the --wait-start option. + A value of 0 will disable the timeout. + + + The timeout will be ignored if -F/--force was provided. + + + + Waiting for the registration to propagate to the standby Depending on your environment and workload, it may take some time for @@ -75,11 +92,11 @@ - Event notifications - A standby_register event notification will be generated. + A standby_register event notification + will be generated. diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 9b5cac24..1f6213dd 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -783,9 +783,52 @@ do_standby_register(void) conn = establish_db_connection_quiet(config_file_options.conninfo); + /* + * if --force provided, don't wait for the node to start, as the + * normal use case will be re-registering an existing node, or + * registering an inactive/not-yet-extant one; we'll do the + * error handling for those cases in the next code block + */ + if (PQstatus(conn) != CONNECTION_OK && runtime_options.force == false) + { + bool conn_ok = false; + int timer = 0; + + for (;;) + { + if (timer == runtime_options.wait_start) + break; + + sleep(1); + + log_verbose(LOG_INFO, _("%i of %i connection attempts"), + timer + 1, + runtime_options.wait_start); + + conn = establish_db_connection_quiet(config_file_options.conninfo); + + if (PQstatus(conn) == CONNECTION_OK) + { + conn_ok = true; + break; + } + + timer++; + } + + if (conn_ok == true) + { + log_info(_("connected to local node \"%s\" (ID: %i) after %i seconds"), + config_file_options.node_name, + config_file_options.node_id, + timer); + } + + } + if (PQstatus(conn) != CONNECTION_OK) { - if (!runtime_options.force) + if (runtime_options.force == false) { log_error(_("unable to connect to local node \"%s\" (ID: %i)"), config_file_options.node_name, @@ -797,7 +840,7 @@ do_standby_register(void) exit(ERR_BAD_CONFIG); } - if (!runtime_options.connection_param_provided) + if (runtime_options.connection_param_provided == false) { log_error(_("unable to connect to local node \"%s\" (ID: %i) and no primary connection parameters provided"), config_file_options.node_name, @@ -821,8 +864,8 @@ do_standby_register(void) } /* - * User is forcing a registration and must have supplied primary - * connection info + * otherwise user is forcing a registration of a (potentially) inactive (or + * not-yet-extant) node and must have supplied primary connection info */ else { @@ -5313,6 +5356,8 @@ do_standby_help(void) printf(_(" -F, --force overwrite an existing node record, or if primary connection\n" \ " parameters supplied, create record even if standby offline\n")); printf(_(" --upstream-node-id ID of the upstream node to replicate from (optional)\n")); + printf(_(" --wait-start=VALUE wait for the standby to start (timeout in seconds, default %i)\n"), DEFAULT_WAIT_START); + printf(_(" --wait-sync[=VALUE] wait for the node record to synchronise to the standby\n" \ " (optional timeout in seconds)\n")); diff --git a/repmgr-client-global.h b/repmgr-client-global.h index 276bfc14..c254a6ec 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -86,6 +86,7 @@ typedef struct /* "standby register" options */ bool wait_register_sync; int wait_register_sync_seconds; + int wait_start; /* "standby switchover" options */ bool always_promote; @@ -146,7 +147,7 @@ typedef struct /* "standby clone"/"standby follow" options */ \ NO_UPSTREAM_NODE, \ /* "standby register" options */ \ - false, 0, \ + false, 0, DEFAULT_WAIT_START, \ /* "standby switchover" options */ \ false, false, false, \ /* "node status" options */ \ diff --git a/repmgr-client.c b/repmgr-client.c index a4a787e7..d722204c 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -389,7 +389,11 @@ main(int argc, char **argv) *--------------------------- */ - case OPT_REGISTER_WAIT: + case OPT_WAIT_START: + runtime_options.wait_start = repmgr_atoi(optarg, "--wait-start", &cli_errors, false); + break; + + case OPT_WAIT_SYNC: runtime_options.wait_register_sync = true; if (optarg != NULL) { diff --git a/repmgr-client.h b/repmgr-client.h index 21f1bbdd..cc901ee3 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -56,7 +56,7 @@ #define OPT_NODE_NAME 1007 #define OPT_WITHOUT_BARMAN 1008 #define OPT_NO_UPSTREAM_CONNECTION 1009 -#define OPT_REGISTER_WAIT 1010 +#define OPT_WAIT_SYNC 1010 #define OPT_LOG_TO_FILE 1011 #define OPT_UPSTREAM_CONNINFO 1012 #define OPT_REPLICATION_USER 1013 @@ -82,6 +82,8 @@ #define OPT_SLOTS 1033 #define OPT_CONFIG_ARCHIVE_DIR 1034 #define OPT_HAS_PASSFILE 1035 +#define OPT_WAIT_START 1036 + /* deprecated since 3.3 */ #define OPT_DATA_DIR 999 #define OPT_NO_CONNINFO_PASSWORD 998 @@ -136,7 +138,8 @@ static struct option long_options[] = {"without-barman", no_argument, NULL, OPT_WITHOUT_BARMAN}, /* "standby register" options */ - {"wait-sync", optional_argument, NULL, OPT_REGISTER_WAIT}, + {"wait-start", required_argument, NULL, OPT_WAIT_START}, + {"wait-sync", optional_argument, NULL, OPT_WAIT_SYNC}, /* "standby switchover" options * diff --git a/repmgr.h b/repmgr.h index 0bb41ecf..442a3d8f 100644 --- a/repmgr.h +++ b/repmgr.h @@ -76,6 +76,7 @@ #define DEFAULT_REPLICATION_LAG_WARNING 300 /* seconds */ #define DEFAULT_REPLICATION_LAG_CRITICAL 600 /* seconds */ #define DEFAULT_WITNESS_SYNC_INTERVAL 15 /* seconds */ +#define DEFAULT_WAIT_START 30 /* seconds */ #ifndef RECOVERY_COMMAND_FILE #define RECOVERY_COMMAND_FILE "recovery.conf"