From 6a1797cadd99703d6cfac8bd4d42c0a7143661d7 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 30 Mar 2018 16:04:00 +0900 Subject: [PATCH] Enable pg_rewind to be used with PostgreSQL 9.3/9.4 pg_rewind is not part of the core distribution for those, but we provided support in repmgr 3.3 so should extend it to repmgr 4. Note that there is no check in place whether the pg_rewind binary exists, so it's up to the user to ensure it's present. Addresses GitHub #413. --- dbutils.c | 61 --------------------- dbutils.h | 1 - doc/repmgr-standby-switchover.sgml | 7 ++- doc/switchover.sgml | 40 ++++++++++++++ repmgr-action-node.c | 51 +++++++++++------- repmgr-action-standby.c | 21 ++++++-- repmgr-client-global.h | 6 ++- repmgr-client.c | 85 +++++++++++++++++++++++++++++- repmgr-client.h | 2 +- 9 files changed, 180 insertions(+), 94 deletions(-) diff --git a/dbutils.c b/dbutils.c index 6b564959..d576bed6 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1342,67 +1342,6 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info) } -bool -can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason) -{ - bool can_use = true; - - if (server_version_num == UNKNOWN_SERVER_VERSION_NUM) - server_version_num = get_server_version(conn, NULL); - - if (server_version_num < 90500) - { - appendPQExpBuffer(reason, - _("pg_rewind available from PostgreSQL 9.5")); - return false; - } - - if (guc_set(conn, "full_page_writes", "=", "off")) - { - if (can_use == false) - appendPQExpBuffer(reason, "; "); - - appendPQExpBuffer(reason, - _("\"full_page_writes\" must be set to \"on\"")); - - can_use = false; - } - - /* - * "wal_log_hints" off - are data checksums available? Note: we're - * checking the local pg_control file here as the value will be the same - * throughout the cluster and saves a round-trip to the demotion - * candidate. - */ - if (guc_set(conn, "wal_log_hints", "=", "on") == false) - { - int data_checksum_version = get_data_checksum_version(data_directory); - - if (data_checksum_version < 0) - { - if (can_use == false) - appendPQExpBuffer(reason, "; "); - - appendPQExpBuffer(reason, - _("\"wal_log_hints\" is set to \"off\" but unable to determine data checksum version")); - can_use = false; - } - else if (data_checksum_version == 0) - { - if (can_use == false) - appendPQExpBuffer(reason, "; "); - - appendPQExpBuffer(reason, - _("\"wal_log_hints\" is set to \"off\" and data checksums are disabled")); - - can_use = false; - } - } - - return can_use; -} - - int get_ready_archive_files(PGconn *conn, const char *data_directory) { diff --git a/dbutils.h b/dbutils.h index 2055637c..93fbf019 100644 --- a/dbutils.h +++ b/dbutils.h @@ -387,7 +387,6 @@ bool get_cluster_size(PGconn *conn, char *size); int get_server_version(PGconn *conn, char *server_version); RecoveryType get_recovery_type(PGconn *conn); int get_primary_node_id(PGconn *conn); -bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason); int get_ready_archive_files(PGconn *conn, const char *data_directory); bool identify_system(PGconn *repl_conn, t_system_identification *identification); bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id); diff --git a/doc/repmgr-standby-switchover.sgml b/doc/repmgr-standby-switchover.sgml index bec28cce..3e1fcf41 100644 --- a/doc/repmgr-standby-switchover.sgml +++ b/doc/repmgr-standby-switchover.sgml @@ -84,11 +84,14 @@ - + Use pg_rewind to reintegrate the old primary if necessary - (PostgreSQL 9.5 and later). + (and the prerequisites for using pg_rewind are met). + If using PostgreSQL 9.3 or 9.4, and the pg_rewind + binary is not installed in the PostgreSQL bin directory, + provide its full path. For more details see also . diff --git a/doc/switchover.sgml b/doc/switchover.sgml index f34c5416..74109e13 100644 --- a/doc/switchover.sgml +++ b/doc/switchover.sgml @@ -179,6 +179,46 @@ + + Switchover and pg_rewind + + If the demotion candidate does not shut down smoothly or cleanly, there's a risk it + will have a slightly divergent timeline and will not be able to attach to the new + primary. To fix this situation without needing to reclone the old primary, it's + possible to use the pg_rewind utility, which will usually be + able to resync the two servers. + + + To have &repmgr; execute pg_rewind if it detects this + situation after promoting the new primary, add the + option. + + + + If &repmgr; detects a situation where it needs to execute pg_rewind, + it will execute a CHECKPOINT on the new primary before executing + pg_rewind. + + + + For more details on pg_rewind, see: + https://www.postgresql.org/docs/current/static/app-pgrewind.html. + + + pg_rewind has been part of the core PostgreSQL distribution since + version 9.5. Users of versions 9.3 and 9.4 will need to manually install; source code available here: + https://github.com/vmware/pg_rewind. + If the pg_rewind + binary is not installed in the PostgreSQL bin directory, provide + its full path on the demotion candidate with . + + + Note that building the 9.3/9.4 version of pg_rewind requires the PostgreSQL + source code. Also, PostgreSQL 9.3 does not provide wal_log_hints, + meaning data checksums must have been enabled when the database was initialized. + + + diff --git a/repmgr-action-node.c b/repmgr-action-node.c index 16b50e37..1defef5e 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -1728,7 +1728,7 @@ do_node_rejoin(void) { log_error(_("database is not shut down cleanly")); - if (runtime_options.force_rewind == true) + if (runtime_options.force_rewind_used == true) { log_detail(_("pg_rewind will not be able to run")); } @@ -1779,7 +1779,7 @@ do_node_rejoin(void) * archived */ - if (runtime_options.force_rewind == true) + if (runtime_options.force_rewind_used == true) { PQExpBufferData reason; PQExpBufferData msg; @@ -1816,7 +1816,7 @@ do_node_rejoin(void) * Forcibly rewind node if requested (this is mainly for use when this * action is being executed by "repmgr standby switchover") */ - if (runtime_options.force_rewind == true && runtime_options.dry_run == false) + if (runtime_options.force_rewind_used == true && runtime_options.dry_run == false) { int ret; PQExpBufferData filebuf; @@ -1826,9 +1826,18 @@ do_node_rejoin(void) /* execute pg_rewind */ initPQExpBuffer(&command); - appendPQExpBuffer(&command, - "%s -D ", - make_pg_path("pg_rewind")); + if (runtime_options.force_rewind_path[0] != '\0') + { + appendPQExpBuffer(&command, + "%s -D ", + runtime_options.force_rewind_path); + } + else + { + appendPQExpBuffer(&command, + "%s -D ", + make_pg_path("pg_rewind")); + } appendShellString(&command, config_file_options.data_directory); @@ -2438,15 +2447,15 @@ do_node_help(void) puts(""); printf(_(" Configuration file required, runs on local node only.\n")); puts(""); - printf(_(" --csv emit output as CSV\n")); - printf(_(" --nagios emit output in Nagios format (individual status output only)\n")); + printf(_(" --csv emit output as CSV\n")); + printf(_(" --nagios emit output in Nagios format (individual status output only)\n")); puts(""); printf(_(" Following options check an individual status:\n")); - printf(_(" --archive-ready number of WAL files ready for archiving\n")); - printf(_(" --downstream whether all downstream nodes are connected\n")); - printf(_(" --replication-lag replication lag in seconds (standbys only)\n")); - printf(_(" --role check node has expected role\n")); - printf(_(" --slots check for inactive replication slots\n")); + printf(_(" --archive-ready number of WAL files ready for archiving\n")); + printf(_(" --downstream whether all downstream nodes are connected\n")); + printf(_(" --replication-lag replication lag in seconds (standbys only)\n")); + printf(_(" --role check node has expected role\n")); + printf(_(" --slots check for inactive replication slots\n")); puts(""); @@ -2456,13 +2465,15 @@ do_node_help(void) puts(""); printf(_(" Configuration file required, runs on local node only.\n")); puts(""); - printf(_(" --dry-run check that the prerequisites are met for rejoining the node\n" \ - " (including usability of \"pg_rewind\" if requested)\n")); - printf(_(" --force-rewind execute \"pg_rewind\" if necessary\n")); - printf(_(" --config-files comma-separated list of configuration files to retain\n" \ - " after executing \"pg_rewind\"\n")); - printf(_(" --config-archive-dir directory to temporarily store retained configuration files\n" \ - " (default: /tmp)\n")); + printf(_(" --dry-run check that the prerequisites are met for rejoining the node\n" \ + " (including usability of \"pg_rewind\" if requested)\n")); + printf(_(" --force-rewind[=VALUE] execute \"pg_rewind\" if necessary\n")); + printf(_(" (9.3 and 9.4 - provide full \"pg_rewind\" path)\n")); + + printf(_(" --config-files comma-separated list of configuration files to retain\n" \ + " after executing \"pg_rewind\"\n")); + printf(_(" --config-archive-dir directory to temporarily store retained configuration files\n" \ + " (default: /tmp)\n")); puts(""); printf(_("NODE SERVICE\n")); diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index e8b50290..34b53269 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -2771,7 +2771,7 @@ do_standby_switchover(void) * archived */ - if (runtime_options.force_rewind == true) + if (runtime_options.force_rewind_used == true) { PQExpBufferData reason; PQExpBufferData msg; @@ -3578,7 +3578,7 @@ do_standby_switchover(void) * in pg_control, which is written at the first checkpoint, which might not * occur immediately. */ - if (runtime_options.force_rewind == true) + if (runtime_options.force_rewind_used == true) { log_notice(_("issuing CHECKPOINT")); checkpoint(local_conn); @@ -3595,7 +3595,7 @@ do_standby_switchover(void) KeyValueListCell *cell = NULL; bool first_entry = true; - if (runtime_options.force_rewind == false) + if (runtime_options.force_rewind_used == false) { log_error(_("new primary diverges from former primary and --force-rewind not provided")); log_hint(_("the former primary will need to be restored manually, or use \"repmgr node rejoin\"")); @@ -3605,7 +3605,16 @@ do_standby_switchover(void) } appendPQExpBuffer(&node_rejoin_options, - " --force-rewind --config-files="); + " --force-rewind"); + + if (runtime_options.force_rewind_path[0] != '\0') + { + appendPQExpBuffer(&node_rejoin_options, + "=%s", + runtime_options.force_rewind_path); + } + appendPQExpBuffer(&node_rejoin_options, + " --config-files="); for (cell = remote_config_files.head; cell; cell = cell->next) { @@ -6315,7 +6324,9 @@ do_standby_help(void) printf(_(" --always-promote promote standby even if behind original primary\n")); printf(_(" --dry-run perform checks etc. but don't actually execute switchover\n")); printf(_(" -F, --force ignore warnings and continue anyway\n")); - printf(_(" --force-rewind 9.5 and later - use pg_rewind to reintegrate the old primary if necessary\n")); + printf(_(" --force-rewind[=VALUE] use \"pg_rewind\" to reintegrate the old primary if necessary\n")); + printf(_(" (9.3 and 9.4 - provide \"pg_rewind\" path)\n")); + printf(_(" -R, --remote-user=USERNAME database server username for SSH operations (default: \"%s\")\n"), runtime_options.username); printf(_(" --siblings-follow have other standbys follow new primary\n")); diff --git a/repmgr-client-global.h b/repmgr-client-global.h index f8629937..8814fff5 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -92,7 +92,8 @@ typedef struct /* "standby switchover" options */ bool always_promote; - bool force_rewind; + bool force_rewind_used; + char force_rewind_path[MAXPGPATH]; bool siblings_follow; /* "node status" options */ @@ -152,7 +153,7 @@ typedef struct /* "standby register" options */ \ false, 0, DEFAULT_WAIT_START, \ /* "standby switchover" options */ \ - false, false, false, \ + false, false, "", false, \ /* "node status" options */ \ false, \ /* "node check" options */ \ @@ -231,5 +232,6 @@ extern void get_server_action(t_server_action action, char *script, char *data_d extern bool data_dir_required_for_action(t_server_action action); extern void get_node_data_directory(char *data_dir_buf); extern void init_node_record(t_node_info *node_record); +extern bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason); #endif /* _REPMGR_CLIENT_GLOBAL_H_ */ diff --git a/repmgr-client.c b/repmgr-client.c index 42e499ad..aaff7c0b 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -53,6 +53,7 @@ #include "repmgr.h" #include "compat.h" +#include "controldata.h" #include "repmgr-client.h" #include "repmgr-client-global.h" #include "repmgr-action-primary.h" @@ -421,7 +422,13 @@ main(int argc, char **argv) break; case OPT_FORCE_REWIND: - runtime_options.force_rewind = true; + runtime_options.force_rewind_used = true; + + if (optarg != NULL) + { + strncpy(runtime_options.force_rewind_path, optarg, MAXPGPATH); + } + break; case OPT_SIBLINGS_FOLLOW: @@ -1605,7 +1612,7 @@ check_cli_parameters(const int action) } } - if (runtime_options.force_rewind == true) + if (runtime_options.force_rewind_used == true) { switch (action) { @@ -2749,3 +2756,77 @@ init_node_record(t_node_info *node_record) create_slot_name(node_record->slot_name, config_file_options.node_id); } } + + +bool +can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason) +{ + bool can_use = true; + int server_version_num = get_server_version(conn, NULL); + + /* wal_log_hints not available in 9.3, so just determine if data checksums enabled */ + if (server_version_num < 90400) + { + int data_checksum_version = get_data_checksum_version(data_directory); + + if (data_checksum_version < 0) + { + appendPQExpBuffer(reason, + _("unable to determine data checksum version")); + can_use = false; + } + else if (data_checksum_version == 0) + { + appendPQExpBuffer(reason, + _("this cluster was initialised without data checksums")); + can_use = false; + } + + return can_use; + } + + /* "full_page_writes" must be on in any case */ + if (guc_set(conn, "full_page_writes", "=", "off")) + { + if (can_use == false) + appendPQExpBuffer(reason, "; "); + + appendPQExpBuffer(reason, + _("\"full_page_writes\" must be set to \"on\"")); + + can_use = false; + } + + /* + * "wal_log_hints" off - are data checksums available? Note: we're + * checking the local pg_control file here as the value will be the same + * throughout the cluster and saves a round-trip to the demotion + * candidate. + */ + if (guc_set(conn, "wal_log_hints", "=", "on") == false) + { + int data_checksum_version = get_data_checksum_version(data_directory); + + if (data_checksum_version < 0) + { + if (can_use == false) + appendPQExpBuffer(reason, "; "); + + appendPQExpBuffer(reason, + _("\"wal_log_hints\" is set to \"off\" but unable to determine data checksum version")); + can_use = false; + } + else if (data_checksum_version == 0) + { + if (can_use == false) + appendPQExpBuffer(reason, "; "); + + appendPQExpBuffer(reason, + _("\"wal_log_hints\" is set to \"off\" and data checksums are disabled")); + + can_use = false; + } + } + + return can_use; +} diff --git a/repmgr-client.h b/repmgr-client.h index 8ec68c69..b3ad5518 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -168,7 +168,7 @@ static struct option long_options[] = /* "node rejoin" options */ {"config-files", required_argument, NULL, OPT_CONFIG_FILES}, {"config-archive-dir", required_argument, NULL, OPT_CONFIG_ARCHIVE_DIR}, - {"force-rewind", no_argument, NULL, OPT_FORCE_REWIND}, + {"force-rewind", optional_argument, NULL, OPT_FORCE_REWIND}, /* "node service" options */ {"action", required_argument, NULL, OPT_ACTION},