"repmgr standby register": add --wait-start option

Implements GitHub #356.
This commit is contained in:
Ian Barwick
2018-01-04 12:48:12 +09:00
parent 832b38c5cb
commit b705127a34
6 changed files with 82 additions and 11 deletions

View File

@@ -37,7 +37,24 @@
</note>
</refsect1>
<refsect1 id="repmgr-standby-register-wait" xreflabel="repmgr standby register --wait">
<refsect1 id="repmgr-standby-register-wait-start" xreflabel="repmgr standby register --wait-start">
<title>Waiting for the the standby to start</title>
<para>
By default, &repmgr; will wait 30 seconds for the standby to become available before
aborting with a connection error. This is useful when setting up a standby from a script,
as the standby may not have fully started up by the time <command>repmgr standby register</command>
is executed.
</para>
<para>
To change the timeout, pass the desired value with the <literal>--wait-start</literal> option.
A value of <literal>0</literal> will disable the timeout.
</para>
<para>
The timeout will be ignored if <literal>-F/--force</literal> was provided.
</para>
</refsect1>
<refsect1 id="repmgr-standby-register-wait-sync" xreflabel="repmgr standby register --wait-sync">
<title>Waiting for the registration to propagate to the standby</title>
<para>
Depending on your environment and workload, it may take some time for
@@ -75,11 +92,11 @@
</para>
</refsect1>
<refsect1>
<title>Event notifications</title>
<para>
A <literal>standby_register</literal> event notification will be generated.
A <literal>standby_register</literal> <link linkend="event-notifications">event notification</link>
will be generated.
</para>
</refsect1>

View File

@@ -783,9 +783,52 @@ do_standby_register(void)
conn = establish_db_connection_quiet(config_file_options.conninfo);
/*
* if --force provided, don't wait for the node to start, as the
* normal use case will be re-registering an existing node, or
* registering an inactive/not-yet-extant one; we'll do the
* error handling for those cases in the next code block
*/
if (PQstatus(conn) != CONNECTION_OK && runtime_options.force == false)
{
bool conn_ok = false;
int timer = 0;
for (;;)
{
if (timer == runtime_options.wait_start)
break;
sleep(1);
log_verbose(LOG_INFO, _("%i of %i connection attempts"),
timer + 1,
runtime_options.wait_start);
conn = establish_db_connection_quiet(config_file_options.conninfo);
if (PQstatus(conn) == CONNECTION_OK)
{
conn_ok = true;
break;
}
timer++;
}
if (conn_ok == true)
{
log_info(_("connected to local node \"%s\" (ID: %i) after %i seconds"),
config_file_options.node_name,
config_file_options.node_id,
timer);
}
}
if (PQstatus(conn) != CONNECTION_OK)
{
if (!runtime_options.force)
if (runtime_options.force == false)
{
log_error(_("unable to connect to local node \"%s\" (ID: %i)"),
config_file_options.node_name,
@@ -797,7 +840,7 @@ do_standby_register(void)
exit(ERR_BAD_CONFIG);
}
if (!runtime_options.connection_param_provided)
if (runtime_options.connection_param_provided == false)
{
log_error(_("unable to connect to local node \"%s\" (ID: %i) and no primary connection parameters provided"),
config_file_options.node_name,
@@ -821,8 +864,8 @@ do_standby_register(void)
}
/*
* User is forcing a registration and must have supplied primary
* connection info
* otherwise user is forcing a registration of a (potentially) inactive (or
* not-yet-extant) node and must have supplied primary connection info
*/
else
{
@@ -5313,6 +5356,8 @@ do_standby_help(void)
printf(_(" -F, --force overwrite an existing node record, or if primary connection\n" \
" parameters supplied, create record even if standby offline\n"));
printf(_(" --upstream-node-id ID of the upstream node to replicate from (optional)\n"));
printf(_(" --wait-start=VALUE wait for the standby to start (timeout in seconds, default %i)\n"), DEFAULT_WAIT_START);
printf(_(" --wait-sync[=VALUE] wait for the node record to synchronise to the standby\n" \
" (optional timeout in seconds)\n"));

View File

@@ -86,6 +86,7 @@ typedef struct
/* "standby register" options */
bool wait_register_sync;
int wait_register_sync_seconds;
int wait_start;
/* "standby switchover" options */
bool always_promote;
@@ -146,7 +147,7 @@ typedef struct
/* "standby clone"/"standby follow" options */ \
NO_UPSTREAM_NODE, \
/* "standby register" options */ \
false, 0, \
false, 0, DEFAULT_WAIT_START, \
/* "standby switchover" options */ \
false, false, false, \
/* "node status" options */ \

View File

@@ -389,7 +389,11 @@ main(int argc, char **argv)
*---------------------------
*/
case OPT_REGISTER_WAIT:
case OPT_WAIT_START:
runtime_options.wait_start = repmgr_atoi(optarg, "--wait-start", &cli_errors, false);
break;
case OPT_WAIT_SYNC:
runtime_options.wait_register_sync = true;
if (optarg != NULL)
{

View File

@@ -56,7 +56,7 @@
#define OPT_NODE_NAME 1007
#define OPT_WITHOUT_BARMAN 1008
#define OPT_NO_UPSTREAM_CONNECTION 1009
#define OPT_REGISTER_WAIT 1010
#define OPT_WAIT_SYNC 1010
#define OPT_LOG_TO_FILE 1011
#define OPT_UPSTREAM_CONNINFO 1012
#define OPT_REPLICATION_USER 1013
@@ -82,6 +82,8 @@
#define OPT_SLOTS 1033
#define OPT_CONFIG_ARCHIVE_DIR 1034
#define OPT_HAS_PASSFILE 1035
#define OPT_WAIT_START 1036
/* deprecated since 3.3 */
#define OPT_DATA_DIR 999
#define OPT_NO_CONNINFO_PASSWORD 998
@@ -136,7 +138,8 @@ static struct option long_options[] =
{"without-barman", no_argument, NULL, OPT_WITHOUT_BARMAN},
/* "standby register" options */
{"wait-sync", optional_argument, NULL, OPT_REGISTER_WAIT},
{"wait-start", required_argument, NULL, OPT_WAIT_START},
{"wait-sync", optional_argument, NULL, OPT_WAIT_SYNC},
/* "standby switchover" options
*

View File

@@ -76,6 +76,7 @@
#define DEFAULT_REPLICATION_LAG_WARNING 300 /* seconds */
#define DEFAULT_REPLICATION_LAG_CRITICAL 600 /* seconds */
#define DEFAULT_WITNESS_SYNC_INTERVAL 15 /* seconds */
#define DEFAULT_WAIT_START 30 /* seconds */
#ifndef RECOVERY_COMMAND_FILE
#define RECOVERY_COMMAND_FILE "recovery.conf"