diff --git a/dbutils.c b/dbutils.c index 6b564959..d576bed6 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1342,67 +1342,6 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info) } -bool -can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason) -{ - bool can_use = true; - - if (server_version_num == UNKNOWN_SERVER_VERSION_NUM) - server_version_num = get_server_version(conn, NULL); - - if (server_version_num < 90500) - { - appendPQExpBuffer(reason, - _("pg_rewind available from PostgreSQL 9.5")); - return false; - } - - if (guc_set(conn, "full_page_writes", "=", "off")) - { - if (can_use == false) - appendPQExpBuffer(reason, "; "); - - appendPQExpBuffer(reason, - _("\"full_page_writes\" must be set to \"on\"")); - - can_use = false; - } - - /* - * "wal_log_hints" off - are data checksums available? Note: we're - * checking the local pg_control file here as the value will be the same - * throughout the cluster and saves a round-trip to the demotion - * candidate. - */ - if (guc_set(conn, "wal_log_hints", "=", "on") == false) - { - int data_checksum_version = get_data_checksum_version(data_directory); - - if (data_checksum_version < 0) - { - if (can_use == false) - appendPQExpBuffer(reason, "; "); - - appendPQExpBuffer(reason, - _("\"wal_log_hints\" is set to \"off\" but unable to determine data checksum version")); - can_use = false; - } - else if (data_checksum_version == 0) - { - if (can_use == false) - appendPQExpBuffer(reason, "; "); - - appendPQExpBuffer(reason, - _("\"wal_log_hints\" is set to \"off\" and data checksums are disabled")); - - can_use = false; - } - } - - return can_use; -} - - int get_ready_archive_files(PGconn *conn, const char *data_directory) { diff --git a/dbutils.h b/dbutils.h index 2055637c..93fbf019 100644 --- a/dbutils.h +++ b/dbutils.h @@ -387,7 +387,6 @@ bool get_cluster_size(PGconn *conn, char *size); int get_server_version(PGconn *conn, char *server_version); RecoveryType get_recovery_type(PGconn *conn); int get_primary_node_id(PGconn *conn); -bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason); int get_ready_archive_files(PGconn *conn, const char *data_directory); bool identify_system(PGconn *repl_conn, t_system_identification *identification); bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id); diff --git a/doc/repmgr-standby-switchover.sgml b/doc/repmgr-standby-switchover.sgml index bec28cce..3e1fcf41 100644 --- a/doc/repmgr-standby-switchover.sgml +++ b/doc/repmgr-standby-switchover.sgml @@ -84,11 +84,14 @@ - + Use pg_rewind to reintegrate the old primary if necessary - (PostgreSQL 9.5 and later). + (and the prerequisites for using pg_rewind are met). + If using PostgreSQL 9.3 or 9.4, and the pg_rewind + binary is not installed in the PostgreSQL bin directory, + provide its full path. For more details see also . diff --git a/doc/switchover.sgml b/doc/switchover.sgml index f34c5416..74109e13 100644 --- a/doc/switchover.sgml +++ b/doc/switchover.sgml @@ -179,6 +179,46 @@ + + Switchover and pg_rewind + + If the demotion candidate does not shut down smoothly or cleanly, there's a risk it + will have a slightly divergent timeline and will not be able to attach to the new + primary. To fix this situation without needing to reclone the old primary, it's + possible to use the pg_rewind utility, which will usually be + able to resync the two servers. + + + To have &repmgr; execute pg_rewind if it detects this + situation after promoting the new primary, add the + option. + + + + If &repmgr; detects a situation where it needs to execute pg_rewind, + it will execute a CHECKPOINT on the new primary before executing + pg_rewind. + + + + For more details on pg_rewind, see: + https://www.postgresql.org/docs/current/static/app-pgrewind.html. + + + pg_rewind has been part of the core PostgreSQL distribution since + version 9.5. Users of versions 9.3 and 9.4 will need to manually install; source code available here: + https://github.com/vmware/pg_rewind. + If the pg_rewind + binary is not installed in the PostgreSQL bin directory, provide + its full path on the demotion candidate with . + + + Note that building the 9.3/9.4 version of pg_rewind requires the PostgreSQL + source code. Also, PostgreSQL 9.3 does not provide wal_log_hints, + meaning data checksums must have been enabled when the database was initialized. + + + diff --git a/repmgr-action-node.c b/repmgr-action-node.c index 16b50e37..1defef5e 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -1728,7 +1728,7 @@ do_node_rejoin(void) { log_error(_("database is not shut down cleanly")); - if (runtime_options.force_rewind == true) + if (runtime_options.force_rewind_used == true) { log_detail(_("pg_rewind will not be able to run")); } @@ -1779,7 +1779,7 @@ do_node_rejoin(void) * archived */ - if (runtime_options.force_rewind == true) + if (runtime_options.force_rewind_used == true) { PQExpBufferData reason; PQExpBufferData msg; @@ -1816,7 +1816,7 @@ do_node_rejoin(void) * Forcibly rewind node if requested (this is mainly for use when this * action is being executed by "repmgr standby switchover") */ - if (runtime_options.force_rewind == true && runtime_options.dry_run == false) + if (runtime_options.force_rewind_used == true && runtime_options.dry_run == false) { int ret; PQExpBufferData filebuf; @@ -1826,9 +1826,18 @@ do_node_rejoin(void) /* execute pg_rewind */ initPQExpBuffer(&command); - appendPQExpBuffer(&command, - "%s -D ", - make_pg_path("pg_rewind")); + if (runtime_options.force_rewind_path[0] != '\0') + { + appendPQExpBuffer(&command, + "%s -D ", + runtime_options.force_rewind_path); + } + else + { + appendPQExpBuffer(&command, + "%s -D ", + make_pg_path("pg_rewind")); + } appendShellString(&command, config_file_options.data_directory); @@ -2438,15 +2447,15 @@ do_node_help(void) puts(""); printf(_(" Configuration file required, runs on local node only.\n")); puts(""); - printf(_(" --csv emit output as CSV\n")); - printf(_(" --nagios emit output in Nagios format (individual status output only)\n")); + printf(_(" --csv emit output as CSV\n")); + printf(_(" --nagios emit output in Nagios format (individual status output only)\n")); puts(""); printf(_(" Following options check an individual status:\n")); - printf(_(" --archive-ready number of WAL files ready for archiving\n")); - printf(_(" --downstream whether all downstream nodes are connected\n")); - printf(_(" --replication-lag replication lag in seconds (standbys only)\n")); - printf(_(" --role check node has expected role\n")); - printf(_(" --slots check for inactive replication slots\n")); + printf(_(" --archive-ready number of WAL files ready for archiving\n")); + printf(_(" --downstream whether all downstream nodes are connected\n")); + printf(_(" --replication-lag replication lag in seconds (standbys only)\n")); + printf(_(" --role check node has expected role\n")); + printf(_(" --slots check for inactive replication slots\n")); puts(""); @@ -2456,13 +2465,15 @@ do_node_help(void) puts(""); printf(_(" Configuration file required, runs on local node only.\n")); puts(""); - printf(_(" --dry-run check that the prerequisites are met for rejoining the node\n" \ - " (including usability of \"pg_rewind\" if requested)\n")); - printf(_(" --force-rewind execute \"pg_rewind\" if necessary\n")); - printf(_(" --config-files comma-separated list of configuration files to retain\n" \ - " after executing \"pg_rewind\"\n")); - printf(_(" --config-archive-dir directory to temporarily store retained configuration files\n" \ - " (default: /tmp)\n")); + printf(_(" --dry-run check that the prerequisites are met for rejoining the node\n" \ + " (including usability of \"pg_rewind\" if requested)\n")); + printf(_(" --force-rewind[=VALUE] execute \"pg_rewind\" if necessary\n")); + printf(_(" (9.3 and 9.4 - provide full \"pg_rewind\" path)\n")); + + printf(_(" --config-files comma-separated list of configuration files to retain\n" \ + " after executing \"pg_rewind\"\n")); + printf(_(" --config-archive-dir directory to temporarily store retained configuration files\n" \ + " (default: /tmp)\n")); puts(""); printf(_("NODE SERVICE\n")); diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index e8b50290..34b53269 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -2771,7 +2771,7 @@ do_standby_switchover(void) * archived */ - if (runtime_options.force_rewind == true) + if (runtime_options.force_rewind_used == true) { PQExpBufferData reason; PQExpBufferData msg; @@ -3578,7 +3578,7 @@ do_standby_switchover(void) * in pg_control, which is written at the first checkpoint, which might not * occur immediately. */ - if (runtime_options.force_rewind == true) + if (runtime_options.force_rewind_used == true) { log_notice(_("issuing CHECKPOINT")); checkpoint(local_conn); @@ -3595,7 +3595,7 @@ do_standby_switchover(void) KeyValueListCell *cell = NULL; bool first_entry = true; - if (runtime_options.force_rewind == false) + if (runtime_options.force_rewind_used == false) { log_error(_("new primary diverges from former primary and --force-rewind not provided")); log_hint(_("the former primary will need to be restored manually, or use \"repmgr node rejoin\"")); @@ -3605,7 +3605,16 @@ do_standby_switchover(void) } appendPQExpBuffer(&node_rejoin_options, - " --force-rewind --config-files="); + " --force-rewind"); + + if (runtime_options.force_rewind_path[0] != '\0') + { + appendPQExpBuffer(&node_rejoin_options, + "=%s", + runtime_options.force_rewind_path); + } + appendPQExpBuffer(&node_rejoin_options, + " --config-files="); for (cell = remote_config_files.head; cell; cell = cell->next) { @@ -6315,7 +6324,9 @@ do_standby_help(void) printf(_(" --always-promote promote standby even if behind original primary\n")); printf(_(" --dry-run perform checks etc. but don't actually execute switchover\n")); printf(_(" -F, --force ignore warnings and continue anyway\n")); - printf(_(" --force-rewind 9.5 and later - use pg_rewind to reintegrate the old primary if necessary\n")); + printf(_(" --force-rewind[=VALUE] use \"pg_rewind\" to reintegrate the old primary if necessary\n")); + printf(_(" (9.3 and 9.4 - provide \"pg_rewind\" path)\n")); + printf(_(" -R, --remote-user=USERNAME database server username for SSH operations (default: \"%s\")\n"), runtime_options.username); printf(_(" --siblings-follow have other standbys follow new primary\n")); diff --git a/repmgr-client-global.h b/repmgr-client-global.h index f8629937..8814fff5 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -92,7 +92,8 @@ typedef struct /* "standby switchover" options */ bool always_promote; - bool force_rewind; + bool force_rewind_used; + char force_rewind_path[MAXPGPATH]; bool siblings_follow; /* "node status" options */ @@ -152,7 +153,7 @@ typedef struct /* "standby register" options */ \ false, 0, DEFAULT_WAIT_START, \ /* "standby switchover" options */ \ - false, false, false, \ + false, false, "", false, \ /* "node status" options */ \ false, \ /* "node check" options */ \ @@ -231,5 +232,6 @@ extern void get_server_action(t_server_action action, char *script, char *data_d extern bool data_dir_required_for_action(t_server_action action); extern void get_node_data_directory(char *data_dir_buf); extern void init_node_record(t_node_info *node_record); +extern bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason); #endif /* _REPMGR_CLIENT_GLOBAL_H_ */ diff --git a/repmgr-client.c b/repmgr-client.c index 42e499ad..aaff7c0b 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -53,6 +53,7 @@ #include "repmgr.h" #include "compat.h" +#include "controldata.h" #include "repmgr-client.h" #include "repmgr-client-global.h" #include "repmgr-action-primary.h" @@ -421,7 +422,13 @@ main(int argc, char **argv) break; case OPT_FORCE_REWIND: - runtime_options.force_rewind = true; + runtime_options.force_rewind_used = true; + + if (optarg != NULL) + { + strncpy(runtime_options.force_rewind_path, optarg, MAXPGPATH); + } + break; case OPT_SIBLINGS_FOLLOW: @@ -1605,7 +1612,7 @@ check_cli_parameters(const int action) } } - if (runtime_options.force_rewind == true) + if (runtime_options.force_rewind_used == true) { switch (action) { @@ -2749,3 +2756,77 @@ init_node_record(t_node_info *node_record) create_slot_name(node_record->slot_name, config_file_options.node_id); } } + + +bool +can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason) +{ + bool can_use = true; + int server_version_num = get_server_version(conn, NULL); + + /* wal_log_hints not available in 9.3, so just determine if data checksums enabled */ + if (server_version_num < 90400) + { + int data_checksum_version = get_data_checksum_version(data_directory); + + if (data_checksum_version < 0) + { + appendPQExpBuffer(reason, + _("unable to determine data checksum version")); + can_use = false; + } + else if (data_checksum_version == 0) + { + appendPQExpBuffer(reason, + _("this cluster was initialised without data checksums")); + can_use = false; + } + + return can_use; + } + + /* "full_page_writes" must be on in any case */ + if (guc_set(conn, "full_page_writes", "=", "off")) + { + if (can_use == false) + appendPQExpBuffer(reason, "; "); + + appendPQExpBuffer(reason, + _("\"full_page_writes\" must be set to \"on\"")); + + can_use = false; + } + + /* + * "wal_log_hints" off - are data checksums available? Note: we're + * checking the local pg_control file here as the value will be the same + * throughout the cluster and saves a round-trip to the demotion + * candidate. + */ + if (guc_set(conn, "wal_log_hints", "=", "on") == false) + { + int data_checksum_version = get_data_checksum_version(data_directory); + + if (data_checksum_version < 0) + { + if (can_use == false) + appendPQExpBuffer(reason, "; "); + + appendPQExpBuffer(reason, + _("\"wal_log_hints\" is set to \"off\" but unable to determine data checksum version")); + can_use = false; + } + else if (data_checksum_version == 0) + { + if (can_use == false) + appendPQExpBuffer(reason, "; "); + + appendPQExpBuffer(reason, + _("\"wal_log_hints\" is set to \"off\" and data checksums are disabled")); + + can_use = false; + } + } + + return can_use; +} diff --git a/repmgr-client.h b/repmgr-client.h index 8ec68c69..b3ad5518 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -168,7 +168,7 @@ static struct option long_options[] = /* "node rejoin" options */ {"config-files", required_argument, NULL, OPT_CONFIG_FILES}, {"config-archive-dir", required_argument, NULL, OPT_CONFIG_ARCHIVE_DIR}, - {"force-rewind", no_argument, NULL, OPT_FORCE_REWIND}, + {"force-rewind", optional_argument, NULL, OPT_FORCE_REWIND}, /* "node service" options */ {"action", required_argument, NULL, OPT_ACTION},