From 5948cf6cdad2650cebd266307bcc54cae59fad2c Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Fri, 4 Aug 2017 00:38:07 +0900 Subject: [PATCH] repmgr standby switchover: add sanity check for pg_rewind useability pg_rewind will only be executed on a demoted primary if explictly requested, to prevent transactions on the primary, which were never replicated, from being automatically overwritten. If --force-rewind is provided, we'll need to check pg_rewind is actually useable before we need to use it. --- Makefile.in | 6 ++--- controldata.c | 34 ++++++++++++++++++++---- controldata.h | 11 +++----- dbutils.c | 58 +++++++++++++++++++++++++++++++++++++++-- dbutils.h | 1 + repmgr-action-standby.c | 28 +++++++++++++++++++- repmgr-client-global.h | 3 ++- strutil.c | 2 -- 8 files changed, 122 insertions(+), 21 deletions(-) diff --git a/Makefile.in b/Makefile.in index 8b2a6b61..88759bf2 100644 --- a/Makefile.in +++ b/Makefile.in @@ -29,9 +29,9 @@ include Makefile.global $(info Building against PostgreSQL $(MAJORVERSION)) REPMGR_CLIENT_OBJS = repmgr-client.o \ - repmgr-action-primary.o repmgr-action-standby.o repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o \ - configfile.o log.o strutil.o dbutils.o dirutil.o compat.o controldata.o -REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o + repmgr-action-primary.o repmgr-action-standby.o repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o \ + configfile.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o +REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o controldata.o $(REPMGR_CLIENT_OBJS): repmgr-client.h diff --git a/controldata.c b/controldata.c index 7c37e0e1..b6035b31 100644 --- a/controldata.c +++ b/controldata.c @@ -15,10 +15,10 @@ #include "repmgr.h" #include "controldata.h" -static ControlFileInfo *get_controlfile(char *DataDir); +static ControlFileInfo *get_controlfile(const char *DataDir); DBState -get_db_state(char *data_directory) +get_db_state(const char *data_directory) { ControlFileInfo *control_file_info; DBState state; @@ -37,8 +37,8 @@ get_db_state(char *data_directory) } -XLogRecPtr -get_latest_checkpoint_location(char *data_directory) +extern XLogRecPtr +get_latest_checkpoint_location(const char *data_directory) { ControlFileInfo *control_file_info; XLogRecPtr checkPoint; @@ -57,6 +57,30 @@ get_latest_checkpoint_location(char *data_directory) } +int +get_data_checksum_version(const char *data_directory) +{ + ControlFileInfo *control_file_info; + int data_checksum_version; + + control_file_info = get_controlfile(data_directory); + + if (control_file_info->control_file_processed == false) + { + data_checksum_version = -1; + } + else + { + data_checksum_version = (int)control_file_info->control_file->data_checksum_version; + } + + pfree(control_file_info->control_file); + pfree(control_file_info); + + return data_checksum_version; +} + + const char * describe_db_state(DBState state) { @@ -86,7 +110,7 @@ describe_db_state(DBState state) * compatibility, and also don't care if the file isn't readable. */ static ControlFileInfo * -get_controlfile(char *DataDir) +get_controlfile(const char *DataDir) { ControlFileInfo *control_file_info; int fd; diff --git a/controldata.h b/controldata.h index 655b283e..9ddf720a 100644 --- a/controldata.h +++ b/controldata.h @@ -18,13 +18,10 @@ typedef struct ControlFileData *control_file; } ControlFileInfo; -extern DBState -get_db_state(char *data_directory); +DBState get_db_state(const char *data_directory); +const char * describe_db_state(DBState state); +int get_data_checksum_version(const char *data_directory); -extern const char * -describe_db_state(DBState state); - -extern XLogRecPtr -get_latest_checkpoint_location(char *data_directory); +extern XLogRecPtr get_latest_checkpoint_location(const char *data_directory); #endif /* _CONTROLDATA_H_ */ diff --git a/dbutils.c b/dbutils.c index 49d951f4..903837fa 100644 --- a/dbutils.c +++ b/dbutils.c @@ -11,9 +11,9 @@ #include "repmgr.h" #include "dbutils.h" +#include "controldata.h" -#include "catalog/pg_control.h" - +#include "dirutil.h" /* mainly for use by repmgrd */ int server_version_num = UNKNOWN_SERVER_VERSION_NUM; @@ -1239,6 +1239,60 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info) } +bool +can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason) +{ + bool can_use = true; + + if (guc_set(conn, "full_page_writes", "=", "off")) + { + if (can_use == false) + appendPQExpBuffer(reason, "; "); + + appendPQExpBuffer( + reason, + _("\"full_page_writes\" must be set to \"on\"")); + + can_use = false; + } + + /* + * "wal_log_hints" off - are data checksums available? + * Note: we're checking the local pg_control file here as the value will + * be the same throughout the cluster and saves a round-trip to the + * demotion candidate. + */ + if (guc_set(conn, "wal_log_hints", "=", "on") == false) + { + int data_checksum_version = get_data_checksum_version(data_directory); + + if (data_checksum_version < 0) + { + if (can_use == false) + appendPQExpBuffer(reason, "; "); + + appendPQExpBuffer( + reason, + _("\"wal_log_hints\" is set to \"off\" but unable to determine checksum version")); + can_use = false; + } + else if (data_checksum_version == 0) + { + if (can_use == false) + appendPQExpBuffer(reason, "; "); + + appendPQExpBuffer( + reason, + _("\"wal_log_hints\" is set to \"off\" and checksums are disabled")); + + can_use = false; + } + } + + return can_use; +} + + /* ================ */ /* result functions */ /* ================ */ diff --git a/dbutils.h b/dbutils.h index 795497e2..71554312 100644 --- a/dbutils.h +++ b/dbutils.h @@ -322,6 +322,7 @@ int get_server_version(PGconn *conn, char *server_version); RecoveryType get_recovery_type(PGconn *conn); int get_primary_node_id(PGconn *conn); bool get_replication_info(PGconn *conn, ReplInfo *replication_info); +bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason); /* extension functions */ ExtensionStatus get_repmgr_extension_status(PGconn *conn); diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 40d0a5ed..fd78edcc 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -1563,7 +1563,8 @@ do_standby_follow(void) * we'll need the location of its configuration file; this * can be provided explicitly with -C/--remote-config-file, * otherwise repmgr will look in default locations on the - * remote server + * remote server (starting with the same path as the local + * configuration file) * * TODO: * - make connection test timeouts/intervals configurable (see below) @@ -1695,6 +1696,31 @@ do_standby_switchover(void) log_verbose(LOG_DEBUG, "remote node name is \"%s\"", remote_node_record.node_name); + + /* + * If --force-rewind specified, check pg_rewind can be used + */ + + if (runtime_options.force_rewind == true) + { + PQExpBufferData reason; + + initPQExpBuffer(&reason); + + if (can_use_pg_rewind(remote_conn, config_file_options.data_directory, &reason) == false) + { + log_error(_("--force-rewind specified but pg_rewind cannot be used")); + log_detail("%s", reason.data); + termPQExpBuffer(&reason); + PQfinish(local_conn); + PQfinish(remote_conn); + + exit(ERR_BAD_CONFIG); + } + + termPQExpBuffer(&reason); + } + PQfinish(local_conn); PQfinish(remote_conn); diff --git a/repmgr-client-global.h b/repmgr-client-global.h index 9ebe4ac5..e3c0533c 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -187,4 +187,5 @@ extern void make_remote_repmgr_path(PQExpBufferData *outputbuf); extern void get_server_action(t_server_action action, char *script, char *data_dir); extern bool data_dir_required_for_action(t_server_action action); extern void get_node_data_directory(char *data_dir_buf); -#endif + +#endif /* _REPMGR_CLIENT_GLOBAL_H_ */ diff --git a/strutil.c b/strutil.c index 6bfb6285..230d1d97 100644 --- a/strutil.c +++ b/strutil.c @@ -8,8 +8,6 @@ #include #include - - #include "log.h" #include "strutil.h"