repmgr: add option --wait-sync for standby register

Causes repmgr to wait for the updated node record to propagate
to the standby before exiting. This can be used to ensure that
actions which depend on the standby's node record being synchronised
(such as starting repmgrd) are not carried out prematurely.

Addresses GitHub #103
This commit is contained in:
Ian Barwick
2016-09-28 14:04:59 +09:00
parent b7f20ee1f7
commit fa10fd8493
5 changed files with 136 additions and 21 deletions

View File

@@ -9,6 +9,9 @@
repmgr: before cloning with pg_basebackup, check that sufficient free
walsenders are available (Ian)
improve "repmgr-auto" Debian package (Gianni)
repmgr: add option `--wait-sync` for `standby register` which causes
repmgr to wait for the registered node record to synchronise to
the standby
3.1.5 2016-08-15
repmgrd: in a failover situation, prevent endless looping when

View File

@@ -1587,23 +1587,25 @@ which contains connection details for the local database.
`repmgr` or `repmgrd` will return one of the following error codes on program
exit:
* SUCCESS (0) Program ran successfully.
* ERR_BAD_CONFIG (1) Configuration file could not be parsed or was invalid
* ERR_BAD_RSYNC (2) An rsync call made by the program returned an error (repmgr only)
* ERR_NO_RESTART (4) An attempt to restart a PostgreSQL instance failed
* ERR_DB_CON (6) Error when trying to connect to a database
* ERR_DB_QUERY (7) Error while executing a database query
* ERR_PROMOTED (8) Exiting program because the node has been promoted to master
* ERR_STR_OVERFLOW (10) String overflow error
* ERR_FAILOVER_FAIL (11) Error encountered during failover (repmgrd only)
* ERR_BAD_SSH (12) Error when connecting to remote host via SSH (repmgr only)
* ERR_SYS_FAILURE (13) Error when forking (repmgrd only)
* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup (repmgr only)
* ERR_MONITORING_FAIL (16) Unrecoverable error encountered during monitoring (repmgrd only)
* ERR_BAD_BACKUP_LABEL (17) Corrupt or unreadable backup label encountered (repmgr only)
* ERR_SWITCHOVER_FAIL (18) Error encountered during switchover (repmgr only)
* ERR_BARMAN (19) Unrecoverable error while accessing the barman server (repmgr only)
* SUCCESS (0) Program ran successfully.
* ERR_BAD_CONFIG (1) Configuration file could not be parsed or was invalid
* ERR_BAD_RSYNC (2) An rsync call made by the program returned an error
(repmgr only)
* ERR_NO_RESTART (4) An attempt to restart a PostgreSQL instance failed
* ERR_DB_CON (6) Error when trying to connect to a database
* ERR_DB_QUERY (7) Error while executing a database query
* ERR_PROMOTED (8) Exiting program because the node has been promoted to master
* ERR_STR_OVERFLOW (10) String overflow error
* ERR_FAILOVER_FAIL (11) Error encountered during failover (repmgrd only)
* ERR_BAD_SSH (12) Error when connecting to remote host via SSH (repmgr only)
* ERR_SYS_FAILURE (13) Error when forking (repmgrd only)
* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup (repmgr only)
* ERR_MONITORING_FAIL (16) Unrecoverable error encountered during monitoring (repmgrd only)
* ERR_BAD_BACKUP_LABEL (17) Corrupt or unreadable backup label encountered (repmgr only)
* ERR_SWITCHOVER_FAIL (18) Error encountered during switchover (repmgr only)
* ERR_BARMAN (19) Unrecoverable error while accessing the barman server (repmgr only)
* ERR_REGISTRATION_SYNC (20) After registering a standby, local node record was no
syncrhonised (repmgr only, with --wait option)
Support and Assistance
----------------------

View File

@@ -39,5 +39,7 @@
#define ERR_BAD_BACKUP_LABEL 17
#define ERR_SWITCHOVER_FAIL 18
#define ERR_BARMAN 19
#define ERR_REGISTRATION_SYNC 20
#endif /* _ERRCODE_H_ */

111
repmgr.c
View File

@@ -227,6 +227,7 @@ main(int argc, char **argv)
{"without-barman", no_argument, NULL, OPT_WITHOUT_BARMAN},
{"no-upstream-connection", no_argument, NULL, OPT_NO_UPSTREAM_CONNECTION},
{"copy-external-config-files", optional_argument, NULL, OPT_COPY_EXTERNAL_CONFIG_FILES},
{"wait-sync", optional_argument, NULL, OPT_REGISTER_WAIT},
{"version", no_argument, NULL, 'V'},
/* Following options deprecated */
{"local-port", required_argument, NULL, 'l'},
@@ -524,7 +525,13 @@ main(int argc, char **argv)
case OPT_NO_UPSTREAM_CONNECTION:
runtime_options.no_upstream_connection = true;
break;
case OPT_REGISTER_WAIT:
runtime_options.wait_register_sync = true;
if (optarg != NULL)
{
runtime_options.wait_register_sync_seconds = repmgr_atoi(optarg, "--wait-sync", &cli_errors, false);
}
break;
/* deprecated options - output a warning */
case 'l':
/* -l/--local-port is deprecated */
@@ -1441,6 +1448,94 @@ do_standby_register(void)
true,
NULL);
/* if --wait-sync option set, wait for the records to synchronise */
if (runtime_options.wait_register_sync)
{
bool sync_ok = false;
int timer = 0;
int node_record_result;
t_node_info node_record_on_master = T_NODE_INFO_INITIALIZER;
t_node_info node_record_on_standby = T_NODE_INFO_INITIALIZER;
node_record_result = get_node_record(master_conn,
options.cluster_name,
options.node,
&node_record_on_master);
if (node_record_result != 1)
{
log_err(_("unable to retrieve node record from master\n"));
PQfinish(master_conn);
PQfinish(conn);
exit(ERR_REGISTRATION_SYNC);
}
for (;;)
{
bool records_match = true;
if (runtime_options.wait_register_sync_seconds && runtime_options.wait_register_sync_seconds == timer)
break;
// XXX check result
node_record_result = get_node_record(conn,
options.cluster_name,
options.node,
&node_record_on_standby);
if (node_record_result == 0)
{
/* no record available yet on standby*/
records_match = false;
}
else if (node_record_result == 1)
{
/* compare relevant fields */
if (node_record_on_standby.upstream_node_id != node_record_on_master.upstream_node_id)
records_match = false;
if (node_record_on_standby.type != node_record_on_master.type)
records_match = false;
if (node_record_on_standby.priority != node_record_on_master.priority)
records_match = false;
if (node_record_on_standby.active != node_record_on_master.active)
records_match = false;
if (strcmp(node_record_on_standby.name, node_record_on_master.name) != 0)
records_match = false;
if (strcmp(node_record_on_standby.conninfo_str, node_record_on_master.conninfo_str) != 0)
records_match = false;
if (strcmp(node_record_on_standby.slot_name, node_record_on_master.slot_name) != 0)
records_match = false;
if (records_match == true)
{
sync_ok = true;
break;
}
}
sleep(1);
timer ++;
}
if (sync_ok == false)
{
log_err(_("node record was not synchronised after %i seconds\n"),
runtime_options.wait_register_sync_seconds);
PQfinish(master_conn);
PQfinish(conn);
exit(ERR_REGISTRATION_SYNC);
}
log_info(_("node record on standby synchronised from master\n"));
}
PQfinish(master_conn);
PQfinish(conn);
@@ -6125,7 +6220,16 @@ check_parameters_for_action(const int action)
}
}
/* Warn about parameters which apply to STANDBY SWITCHOVER only */
/* Warn about parameters which apply to STANDBY REGISTER only */
if (action != STANDBY_REGISTER)
{
if (runtime_options.wait_register_sync)
{
item_list_append(&cli_warnings, _("--wait-sync can only be used when executing STANDBY REGISTER"));
}
}
/* Warn about parameters which apply to STANDBY SWITCHOVER only */
if (action != STANDBY_SWITCHOVER)
{
if (pg_rewind_supplied == true)
@@ -6134,6 +6238,7 @@ check_parameters_for_action(const int action)
}
}
/* Warn about parameters which apply to WITNESS UNREGISTER only */
if (action != WITNESS_UNREGISTER)
{
if (runtime_options.node)
@@ -6142,7 +6247,7 @@ check_parameters_for_action(const int action)
}
}
/* Warn about parameters which apply to CLUSTER SHOW only */
/* Warn about parameters which apply to CLUSTER SHOW only */
if (action != CLUSTER_SHOW)
{
if (runtime_options.csv_mode)

View File

@@ -60,6 +60,7 @@
#define OPT_NODE 9
#define OPT_WITHOUT_BARMAN 10
#define OPT_NO_UPSTREAM_CONNECTION 11
#define OPT_REGISTER_WAIT 12
/* deprecated command line options */
#define OPT_INITDB_NO_PWPROMPT 999
@@ -93,6 +94,8 @@ typedef struct
bool no_upstream_connection;
bool copy_external_config_files;
int copy_external_config_files_destination;
bool wait_register_sync;
int wait_register_sync_seconds;
char masterport[MAXLEN];
/*
* configuration file parameters which can be overridden on the
@@ -116,7 +119,7 @@ typedef struct
char recovery_min_apply_delay[MAXLEN];
} t_runtime_options;
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, false, false, CONFIG_FILE_SAMEPATH, "", "", "", "", "fast", "", 0, 0, "", ""}
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, false, false, CONFIG_FILE_SAMEPATH, false, 0, "", "", "", "", "fast", "", 0, 0, "", ""}
struct BackupLabel
{