From ca6cbcf9655247d1318b3be6428f94d92fedbcaf Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 28 Jan 2016 07:50:39 +0900 Subject: [PATCH] Add sanity checks to be sure pg_rewind can be used before executing a switchover --- dbutils.c | 31 ++++++++++++++++++++++++++++++ dbutils.h | 3 ++- repmgr.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 85 insertions(+), 5 deletions(-) diff --git a/dbutils.c b/dbutils.c index 0f9b5bff..0b12517d 100644 --- a/dbutils.c +++ b/dbutils.c @@ -26,6 +26,8 @@ #include "strutil.h" #include "log.h" +#include "catalog/pg_control.h" + char repmgr_schema[MAXLEN] = ""; char repmgr_schema_quoted[MAXLEN] = ""; @@ -1709,3 +1711,32 @@ parse_node_type(const char *type) return UNKNOWN; } + + +int +get_data_checksum_version(const char *data_directory) +{ + ControlFileData control_file; + int fd; + char control_file_path[MAXPGPATH]; + + snprintf(control_file_path, MAXPGPATH, "%s/global/pg_control", data_directory); + if ((fd = open(control_file_path, O_RDONLY | PG_BINARY, 0)) == -1) + { + log_err(_("Unable to open control file \"%s\" for reading: %s\n"), + control_file_path, strerror(errno)); + return -1; + } + + if (read(fd, &control_file, sizeof(ControlFileData)) != sizeof(ControlFileData)) + { + log_err(_("could not read file \"%s\": %s\n"), + control_file_path, strerror(errno)); + close(fd); + return -1; + } + + close(fd); + + return (int)control_file.data_checksum_version; +} diff --git a/dbutils.h b/dbutils.h index ddc2906d..df9f1065 100644 --- a/dbutils.h +++ b/dbutils.h @@ -129,6 +129,7 @@ bool update_node_record_status(PGconn *conn, char *cluster_name, int this bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id); bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details); -int get_node_replication_state(PGconn *conn, char *node_name, char *output); +int get_node_replication_state(PGconn *conn, char *node_name, char *output); t_server_type parse_node_type(const char *type); +int get_data_checksum_version(const char *data_directory); #endif diff --git a/repmgr.c b/repmgr.c index 37b10cf3..6e621a93 100644 --- a/repmgr.c +++ b/repmgr.c @@ -337,7 +337,6 @@ main(int argc, char **argv) appendPQExpBuffer(&invalid_log_level, _("Invalid log level \"%s\" provided"), optarg); error_list_append(&cli_errors, invalid_log_level.data); termPQExpBuffer(&invalid_log_level); - } break; } @@ -2514,6 +2513,7 @@ do_standby_switchover(void) } log_debug("remote node name is \"%s\"\n", remote_node_record.name); + /* * Check that we can connect by SSH to the remote (current primary) server, * and read its data directory @@ -2523,7 +2523,6 @@ do_standby_switchover(void) * we should only be able to see the file as the PostgreSQL * user, so it should be readable anyway */ - get_conninfo_value(remote_conninfo, "host", remote_host); r = test_ssh_connection(remote_host, runtime_options.remote_user); @@ -2612,14 +2611,17 @@ do_standby_switchover(void) } } - /* check pg_rewind actually exists on remote */ + /* Sanity checks so we're sure pg_rewind can be used */ if (use_pg_rewind == true) { + bool wal_log_hints = false; + + /* check pg_rewind actually exists on remote */ + maxlen_snprintf(command, "ls -1 %s >/dev/null 2>&1 && echo 1 || echo 0", remote_pg_rewind); - log_notice("%s",command); initPQExpBuffer(&command_output); (void)remote_command( @@ -2634,6 +2636,52 @@ do_standby_switchover(void) log_err(_("expected location is: %s\n"), remote_pg_rewind); exit(ERR_BAD_CONFIG); } + + /* check that server is appropriately configured */ + + /* + * "full_page_writes" must be enabled in any case + */ + + if (guc_set(remote_conn, "full_page_writes", "=", "off")) + { + log_err(_("\"full_page_writes\" must be set to \"on\"")); + exit(ERR_BAD_CONFIG); + } + + /* + * Check whether wal_log_hints is on - if so we're fine and don't need + * to check for checksums + */ + + wal_log_hints = guc_set(remote_conn, "wal_log_hints", "=", "on"); + + if (wal_log_hints == false) + { + char local_data_directory[MAXLEN]; + int data_checksum_version; + + /* + * check the *local* server's control data for the date checksum + * version - much easier than doing it on the remote server + */ + + if (get_pg_setting(local_conn, "data_directory", local_data_directory) == false) + { + log_err(_("unable to retrieve standby's data directory location\n")); + PQfinish(remote_conn); + PQfinish(local_conn); + exit(ERR_DB_CON); + } + + data_checksum_version = get_data_checksum_version(local_data_directory); + + if (data_checksum_version == 0) + { + log_err(_("pg_rewind cannot be used - data checksums are not enabled for this cluster and \"wal_log_hints\" is \"off\"\n")); + exit(ERR_BAD_CONFIG); + } + } } PQfinish(local_conn);