Add sanity checks to be sure pg_rewind can be used before executing a switchover

This commit is contained in:
Ian Barwick
2016-01-28 07:50:39 +09:00
committed by Ian Barwick
parent 16c1e13019
commit ca6cbcf965
3 changed files with 85 additions and 5 deletions

View File

@@ -26,6 +26,8 @@
#include "strutil.h"
#include "log.h"
#include "catalog/pg_control.h"
char repmgr_schema[MAXLEN] = "";
char repmgr_schema_quoted[MAXLEN] = "";
@@ -1709,3 +1711,32 @@ parse_node_type(const char *type)
return UNKNOWN;
}
int
get_data_checksum_version(const char *data_directory)
{
ControlFileData control_file;
int fd;
char control_file_path[MAXPGPATH];
snprintf(control_file_path, MAXPGPATH, "%s/global/pg_control", data_directory);
if ((fd = open(control_file_path, O_RDONLY | PG_BINARY, 0)) == -1)
{
log_err(_("Unable to open control file \"%s\" for reading: %s\n"),
control_file_path, strerror(errno));
return -1;
}
if (read(fd, &control_file, sizeof(ControlFileData)) != sizeof(ControlFileData))
{
log_err(_("could not read file \"%s\": %s\n"),
control_file_path, strerror(errno));
close(fd);
return -1;
}
close(fd);
return (int)control_file.data_checksum_version;
}

View File

@@ -129,6 +129,7 @@ bool update_node_record_status(PGconn *conn, char *cluster_name, int this
bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
int get_node_replication_state(PGconn *conn, char *node_name, char *output);
int get_node_replication_state(PGconn *conn, char *node_name, char *output);
t_server_type parse_node_type(const char *type);
int get_data_checksum_version(const char *data_directory);
#endif

View File

@@ -337,7 +337,6 @@ main(int argc, char **argv)
appendPQExpBuffer(&invalid_log_level, _("Invalid log level \"%s\" provided"), optarg);
error_list_append(&cli_errors, invalid_log_level.data);
termPQExpBuffer(&invalid_log_level);
}
break;
}
@@ -2514,6 +2513,7 @@ do_standby_switchover(void)
}
log_debug("remote node name is \"%s\"\n", remote_node_record.name);
/*
* Check that we can connect by SSH to the remote (current primary) server,
* and read its data directory
@@ -2523,7 +2523,6 @@ do_standby_switchover(void)
* we should only be able to see the file as the PostgreSQL
* user, so it should be readable anyway
*/
get_conninfo_value(remote_conninfo, "host", remote_host);
r = test_ssh_connection(remote_host, runtime_options.remote_user);
@@ -2612,14 +2611,17 @@ do_standby_switchover(void)
}
}
/* check pg_rewind actually exists on remote */
/* Sanity checks so we're sure pg_rewind can be used */
if (use_pg_rewind == true)
{
bool wal_log_hints = false;
/* check pg_rewind actually exists on remote */
maxlen_snprintf(command,
"ls -1 %s >/dev/null 2>&1 && echo 1 || echo 0",
remote_pg_rewind);
log_notice("%s",command);
initPQExpBuffer(&command_output);
(void)remote_command(
@@ -2634,6 +2636,52 @@ do_standby_switchover(void)
log_err(_("expected location is: %s\n"), remote_pg_rewind);
exit(ERR_BAD_CONFIG);
}
/* check that server is appropriately configured */
/*
* "full_page_writes" must be enabled in any case
*/
if (guc_set(remote_conn, "full_page_writes", "=", "off"))
{
log_err(_("\"full_page_writes\" must be set to \"on\""));
exit(ERR_BAD_CONFIG);
}
/*
* Check whether wal_log_hints is on - if so we're fine and don't need
* to check for checksums
*/
wal_log_hints = guc_set(remote_conn, "wal_log_hints", "=", "on");
if (wal_log_hints == false)
{
char local_data_directory[MAXLEN];
int data_checksum_version;
/*
* check the *local* server's control data for the date checksum
* version - much easier than doing it on the remote server
*/
if (get_pg_setting(local_conn, "data_directory", local_data_directory) == false)
{
log_err(_("unable to retrieve standby's data directory location\n"));
PQfinish(remote_conn);
PQfinish(local_conn);
exit(ERR_DB_CON);
}
data_checksum_version = get_data_checksum_version(local_data_directory);
if (data_checksum_version == 0)
{
log_err(_("pg_rewind cannot be used - data checksums are not enabled for this cluster and \"wal_log_hints\" is \"off\"\n"));
exit(ERR_BAD_CONFIG);
}
}
}
PQfinish(local_conn);