diff --git a/HISTORY b/HISTORY index c70ae831..4d43e290 100644 --- a/HISTORY +++ b/HISTORY @@ -9,6 +9,9 @@ repmgr: before cloning with pg_basebackup, check that sufficient free walsenders are available (Ian) improve "repmgr-auto" Debian package (Gianni) + repmgr: add option `--wait-sync` for `standby register` which causes + repmgr to wait for the registered node record to synchronise to + the standby 3.1.5 2016-08-15 repmgrd: in a failover situation, prevent endless looping when diff --git a/README.md b/README.md index 366e4002..cbe71ce4 100644 --- a/README.md +++ b/README.md @@ -1587,23 +1587,25 @@ which contains connection details for the local database. `repmgr` or `repmgrd` will return one of the following error codes on program exit: -* SUCCESS (0) Program ran successfully. -* ERR_BAD_CONFIG (1) Configuration file could not be parsed or was invalid -* ERR_BAD_RSYNC (2) An rsync call made by the program returned an error (repmgr only) -* ERR_NO_RESTART (4) An attempt to restart a PostgreSQL instance failed -* ERR_DB_CON (6) Error when trying to connect to a database -* ERR_DB_QUERY (7) Error while executing a database query -* ERR_PROMOTED (8) Exiting program because the node has been promoted to master -* ERR_STR_OVERFLOW (10) String overflow error -* ERR_FAILOVER_FAIL (11) Error encountered during failover (repmgrd only) -* ERR_BAD_SSH (12) Error when connecting to remote host via SSH (repmgr only) -* ERR_SYS_FAILURE (13) Error when forking (repmgrd only) -* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup (repmgr only) -* ERR_MONITORING_FAIL (16) Unrecoverable error encountered during monitoring (repmgrd only) -* ERR_BAD_BACKUP_LABEL (17) Corrupt or unreadable backup label encountered (repmgr only) -* ERR_SWITCHOVER_FAIL (18) Error encountered during switchover (repmgr only) -* ERR_BARMAN (19) Unrecoverable error while accessing the barman server (repmgr only) - +* SUCCESS (0) Program ran successfully. +* ERR_BAD_CONFIG (1) Configuration file could not be parsed or was invalid +* ERR_BAD_RSYNC (2) An rsync call made by the program returned an error + (repmgr only) +* ERR_NO_RESTART (4) An attempt to restart a PostgreSQL instance failed +* ERR_DB_CON (6) Error when trying to connect to a database +* ERR_DB_QUERY (7) Error while executing a database query +* ERR_PROMOTED (8) Exiting program because the node has been promoted to master +* ERR_STR_OVERFLOW (10) String overflow error +* ERR_FAILOVER_FAIL (11) Error encountered during failover (repmgrd only) +* ERR_BAD_SSH (12) Error when connecting to remote host via SSH (repmgr only) +* ERR_SYS_FAILURE (13) Error when forking (repmgrd only) +* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup (repmgr only) +* ERR_MONITORING_FAIL (16) Unrecoverable error encountered during monitoring (repmgrd only) +* ERR_BAD_BACKUP_LABEL (17) Corrupt or unreadable backup label encountered (repmgr only) +* ERR_SWITCHOVER_FAIL (18) Error encountered during switchover (repmgr only) +* ERR_BARMAN (19) Unrecoverable error while accessing the barman server (repmgr only) +* ERR_REGISTRATION_SYNC (20) After registering a standby, local node record was no + syncrhonised (repmgr only, with --wait option) Support and Assistance ---------------------- diff --git a/errcode.h b/errcode.h index 45c43c77..783501f8 100644 --- a/errcode.h +++ b/errcode.h @@ -39,5 +39,7 @@ #define ERR_BAD_BACKUP_LABEL 17 #define ERR_SWITCHOVER_FAIL 18 #define ERR_BARMAN 19 +#define ERR_REGISTRATION_SYNC 20 + #endif /* _ERRCODE_H_ */ diff --git a/repmgr.c b/repmgr.c index 62cc0faa..4efcfdae 100644 --- a/repmgr.c +++ b/repmgr.c @@ -227,6 +227,7 @@ main(int argc, char **argv) {"without-barman", no_argument, NULL, OPT_WITHOUT_BARMAN}, {"no-upstream-connection", no_argument, NULL, OPT_NO_UPSTREAM_CONNECTION}, {"copy-external-config-files", optional_argument, NULL, OPT_COPY_EXTERNAL_CONFIG_FILES}, + {"wait-sync", optional_argument, NULL, OPT_REGISTER_WAIT}, {"version", no_argument, NULL, 'V'}, /* Following options deprecated */ {"local-port", required_argument, NULL, 'l'}, @@ -524,7 +525,13 @@ main(int argc, char **argv) case OPT_NO_UPSTREAM_CONNECTION: runtime_options.no_upstream_connection = true; break; - + case OPT_REGISTER_WAIT: + runtime_options.wait_register_sync = true; + if (optarg != NULL) + { + runtime_options.wait_register_sync_seconds = repmgr_atoi(optarg, "--wait-sync", &cli_errors, false); + } + break; /* deprecated options - output a warning */ case 'l': /* -l/--local-port is deprecated */ @@ -1441,6 +1448,94 @@ do_standby_register(void) true, NULL); + /* if --wait-sync option set, wait for the records to synchronise */ + + if (runtime_options.wait_register_sync) + { + bool sync_ok = false; + int timer = 0; + int node_record_result; + t_node_info node_record_on_master = T_NODE_INFO_INITIALIZER; + t_node_info node_record_on_standby = T_NODE_INFO_INITIALIZER; + + node_record_result = get_node_record(master_conn, + options.cluster_name, + options.node, + &node_record_on_master); + + if (node_record_result != 1) + { + log_err(_("unable to retrieve node record from master\n")); + PQfinish(master_conn); + PQfinish(conn); + exit(ERR_REGISTRATION_SYNC); + } + + for (;;) + { + bool records_match = true; + + if (runtime_options.wait_register_sync_seconds && runtime_options.wait_register_sync_seconds == timer) + break; + + // XXX check result + node_record_result = get_node_record(conn, + options.cluster_name, + options.node, + &node_record_on_standby); + + if (node_record_result == 0) + { + /* no record available yet on standby*/ + records_match = false; + } + else if (node_record_result == 1) + { + /* compare relevant fields */ + if (node_record_on_standby.upstream_node_id != node_record_on_master.upstream_node_id) + records_match = false; + + if (node_record_on_standby.type != node_record_on_master.type) + records_match = false; + + if (node_record_on_standby.priority != node_record_on_master.priority) + records_match = false; + + if (node_record_on_standby.active != node_record_on_master.active) + records_match = false; + + if (strcmp(node_record_on_standby.name, node_record_on_master.name) != 0) + records_match = false; + + if (strcmp(node_record_on_standby.conninfo_str, node_record_on_master.conninfo_str) != 0) + records_match = false; + + if (strcmp(node_record_on_standby.slot_name, node_record_on_master.slot_name) != 0) + records_match = false; + + if (records_match == true) + { + sync_ok = true; + break; + } + } + + sleep(1); + timer ++; + } + + if (sync_ok == false) + { + log_err(_("node record was not synchronised after %i seconds\n"), + runtime_options.wait_register_sync_seconds); + PQfinish(master_conn); + PQfinish(conn); + exit(ERR_REGISTRATION_SYNC); + } + + log_info(_("node record on standby synchronised from master\n")); + } + PQfinish(master_conn); PQfinish(conn); @@ -6125,7 +6220,16 @@ check_parameters_for_action(const int action) } } - /* Warn about parameters which apply to STANDBY SWITCHOVER only */ + /* Warn about parameters which apply to STANDBY REGISTER only */ + if (action != STANDBY_REGISTER) + { + if (runtime_options.wait_register_sync) + { + item_list_append(&cli_warnings, _("--wait-sync can only be used when executing STANDBY REGISTER")); + } + } + + /* Warn about parameters which apply to STANDBY SWITCHOVER only */ if (action != STANDBY_SWITCHOVER) { if (pg_rewind_supplied == true) @@ -6134,6 +6238,7 @@ check_parameters_for_action(const int action) } } + /* Warn about parameters which apply to WITNESS UNREGISTER only */ if (action != WITNESS_UNREGISTER) { if (runtime_options.node) @@ -6142,7 +6247,7 @@ check_parameters_for_action(const int action) } } - /* Warn about parameters which apply to CLUSTER SHOW only */ + /* Warn about parameters which apply to CLUSTER SHOW only */ if (action != CLUSTER_SHOW) { if (runtime_options.csv_mode) diff --git a/repmgr.h b/repmgr.h index 68807ece..48c7f85e 100644 --- a/repmgr.h +++ b/repmgr.h @@ -60,6 +60,7 @@ #define OPT_NODE 9 #define OPT_WITHOUT_BARMAN 10 #define OPT_NO_UPSTREAM_CONNECTION 11 +#define OPT_REGISTER_WAIT 12 /* deprecated command line options */ #define OPT_INITDB_NO_PWPROMPT 999 @@ -93,6 +94,8 @@ typedef struct bool no_upstream_connection; bool copy_external_config_files; int copy_external_config_files_destination; + bool wait_register_sync; + int wait_register_sync_seconds; char masterport[MAXLEN]; /* * configuration file parameters which can be overridden on the @@ -116,7 +119,7 @@ typedef struct char recovery_min_apply_delay[MAXLEN]; } t_runtime_options; -#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, false, false, CONFIG_FILE_SAMEPATH, "", "", "", "", "fast", "", 0, 0, "", ""} +#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, false, false, false, CONFIG_FILE_SAMEPATH, false, 0, "", "", "", "", "fast", "", 0, 0, "", ""} struct BackupLabel {