mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 07:06:30 +00:00
Add sanity checks to be sure pg_rewind can be used before executing a switchover
This commit is contained in:
31
dbutils.c
31
dbutils.c
@@ -26,6 +26,8 @@
|
||||
#include "strutil.h"
|
||||
#include "log.h"
|
||||
|
||||
#include "catalog/pg_control.h"
|
||||
|
||||
char repmgr_schema[MAXLEN] = "";
|
||||
char repmgr_schema_quoted[MAXLEN] = "";
|
||||
|
||||
@@ -1709,3 +1711,32 @@ parse_node_type(const char *type)
|
||||
|
||||
return UNKNOWN;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
get_data_checksum_version(const char *data_directory)
|
||||
{
|
||||
ControlFileData control_file;
|
||||
int fd;
|
||||
char control_file_path[MAXPGPATH];
|
||||
|
||||
snprintf(control_file_path, MAXPGPATH, "%s/global/pg_control", data_directory);
|
||||
if ((fd = open(control_file_path, O_RDONLY | PG_BINARY, 0)) == -1)
|
||||
{
|
||||
log_err(_("Unable to open control file \"%s\" for reading: %s\n"),
|
||||
control_file_path, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (read(fd, &control_file, sizeof(ControlFileData)) != sizeof(ControlFileData))
|
||||
{
|
||||
log_err(_("could not read file \"%s\": %s\n"),
|
||||
control_file_path, strerror(errno));
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
close(fd);
|
||||
|
||||
return (int)control_file.data_checksum_version;
|
||||
}
|
||||
|
||||
@@ -129,6 +129,7 @@ bool update_node_record_status(PGconn *conn, char *cluster_name, int this
|
||||
bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
|
||||
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
||||
|
||||
int get_node_replication_state(PGconn *conn, char *node_name, char *output);
|
||||
int get_node_replication_state(PGconn *conn, char *node_name, char *output);
|
||||
t_server_type parse_node_type(const char *type);
|
||||
int get_data_checksum_version(const char *data_directory);
|
||||
#endif
|
||||
|
||||
56
repmgr.c
56
repmgr.c
@@ -337,7 +337,6 @@ main(int argc, char **argv)
|
||||
appendPQExpBuffer(&invalid_log_level, _("Invalid log level \"%s\" provided"), optarg);
|
||||
error_list_append(&cli_errors, invalid_log_level.data);
|
||||
termPQExpBuffer(&invalid_log_level);
|
||||
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -2514,6 +2513,7 @@ do_standby_switchover(void)
|
||||
}
|
||||
|
||||
log_debug("remote node name is \"%s\"\n", remote_node_record.name);
|
||||
|
||||
/*
|
||||
* Check that we can connect by SSH to the remote (current primary) server,
|
||||
* and read its data directory
|
||||
@@ -2523,7 +2523,6 @@ do_standby_switchover(void)
|
||||
* we should only be able to see the file as the PostgreSQL
|
||||
* user, so it should be readable anyway
|
||||
*/
|
||||
|
||||
get_conninfo_value(remote_conninfo, "host", remote_host);
|
||||
|
||||
r = test_ssh_connection(remote_host, runtime_options.remote_user);
|
||||
@@ -2612,14 +2611,17 @@ do_standby_switchover(void)
|
||||
}
|
||||
}
|
||||
|
||||
/* check pg_rewind actually exists on remote */
|
||||
/* Sanity checks so we're sure pg_rewind can be used */
|
||||
if (use_pg_rewind == true)
|
||||
{
|
||||
bool wal_log_hints = false;
|
||||
|
||||
/* check pg_rewind actually exists on remote */
|
||||
|
||||
maxlen_snprintf(command,
|
||||
"ls -1 %s >/dev/null 2>&1 && echo 1 || echo 0",
|
||||
remote_pg_rewind);
|
||||
|
||||
log_notice("%s",command);
|
||||
initPQExpBuffer(&command_output);
|
||||
|
||||
(void)remote_command(
|
||||
@@ -2634,6 +2636,52 @@ do_standby_switchover(void)
|
||||
log_err(_("expected location is: %s\n"), remote_pg_rewind);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* check that server is appropriately configured */
|
||||
|
||||
/*
|
||||
* "full_page_writes" must be enabled in any case
|
||||
*/
|
||||
|
||||
if (guc_set(remote_conn, "full_page_writes", "=", "off"))
|
||||
{
|
||||
log_err(_("\"full_page_writes\" must be set to \"on\""));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether wal_log_hints is on - if so we're fine and don't need
|
||||
* to check for checksums
|
||||
*/
|
||||
|
||||
wal_log_hints = guc_set(remote_conn, "wal_log_hints", "=", "on");
|
||||
|
||||
if (wal_log_hints == false)
|
||||
{
|
||||
char local_data_directory[MAXLEN];
|
||||
int data_checksum_version;
|
||||
|
||||
/*
|
||||
* check the *local* server's control data for the date checksum
|
||||
* version - much easier than doing it on the remote server
|
||||
*/
|
||||
|
||||
if (get_pg_setting(local_conn, "data_directory", local_data_directory) == false)
|
||||
{
|
||||
log_err(_("unable to retrieve standby's data directory location\n"));
|
||||
PQfinish(remote_conn);
|
||||
PQfinish(local_conn);
|
||||
exit(ERR_DB_CON);
|
||||
}
|
||||
|
||||
data_checksum_version = get_data_checksum_version(local_data_directory);
|
||||
|
||||
if (data_checksum_version == 0)
|
||||
{
|
||||
log_err(_("pg_rewind cannot be used - data checksums are not enabled for this cluster and \"wal_log_hints\" is \"off\"\n"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PQfinish(local_conn);
|
||||
|
||||
Reference in New Issue
Block a user