mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
repmgr standby switchover: add sanity check for pg_rewind useability
pg_rewind will only be executed on a demoted primary if explictly requested, to prevent transactions on the primary, which were never replicated, from being automatically overwritten. If --force-rewind is provided, we'll need to check pg_rewind is actually useable before we need to use it.
This commit is contained in:
@@ -29,9 +29,9 @@ include Makefile.global
|
||||
$(info Building against PostgreSQL $(MAJORVERSION))
|
||||
|
||||
REPMGR_CLIENT_OBJS = repmgr-client.o \
|
||||
repmgr-action-primary.o repmgr-action-standby.o repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o \
|
||||
configfile.o log.o strutil.o dbutils.o dirutil.o compat.o controldata.o
|
||||
REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o
|
||||
repmgr-action-primary.o repmgr-action-standby.o repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o \
|
||||
configfile.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o
|
||||
REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o controldata.o
|
||||
|
||||
$(REPMGR_CLIENT_OBJS): repmgr-client.h
|
||||
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
#include "repmgr.h"
|
||||
#include "controldata.h"
|
||||
|
||||
static ControlFileInfo *get_controlfile(char *DataDir);
|
||||
static ControlFileInfo *get_controlfile(const char *DataDir);
|
||||
|
||||
DBState
|
||||
get_db_state(char *data_directory)
|
||||
get_db_state(const char *data_directory)
|
||||
{
|
||||
ControlFileInfo *control_file_info;
|
||||
DBState state;
|
||||
@@ -37,8 +37,8 @@ get_db_state(char *data_directory)
|
||||
}
|
||||
|
||||
|
||||
XLogRecPtr
|
||||
get_latest_checkpoint_location(char *data_directory)
|
||||
extern XLogRecPtr
|
||||
get_latest_checkpoint_location(const char *data_directory)
|
||||
{
|
||||
ControlFileInfo *control_file_info;
|
||||
XLogRecPtr checkPoint;
|
||||
@@ -57,6 +57,30 @@ get_latest_checkpoint_location(char *data_directory)
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
get_data_checksum_version(const char *data_directory)
|
||||
{
|
||||
ControlFileInfo *control_file_info;
|
||||
int data_checksum_version;
|
||||
|
||||
control_file_info = get_controlfile(data_directory);
|
||||
|
||||
if (control_file_info->control_file_processed == false)
|
||||
{
|
||||
data_checksum_version = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
data_checksum_version = (int)control_file_info->control_file->data_checksum_version;
|
||||
}
|
||||
|
||||
pfree(control_file_info->control_file);
|
||||
pfree(control_file_info);
|
||||
|
||||
return data_checksum_version;
|
||||
}
|
||||
|
||||
|
||||
const char *
|
||||
describe_db_state(DBState state)
|
||||
{
|
||||
@@ -86,7 +110,7 @@ describe_db_state(DBState state)
|
||||
* compatibility, and also don't care if the file isn't readable.
|
||||
*/
|
||||
static ControlFileInfo *
|
||||
get_controlfile(char *DataDir)
|
||||
get_controlfile(const char *DataDir)
|
||||
{
|
||||
ControlFileInfo *control_file_info;
|
||||
int fd;
|
||||
|
||||
@@ -18,13 +18,10 @@ typedef struct
|
||||
ControlFileData *control_file;
|
||||
} ControlFileInfo;
|
||||
|
||||
extern DBState
|
||||
get_db_state(char *data_directory);
|
||||
DBState get_db_state(const char *data_directory);
|
||||
const char * describe_db_state(DBState state);
|
||||
int get_data_checksum_version(const char *data_directory);
|
||||
|
||||
extern const char *
|
||||
describe_db_state(DBState state);
|
||||
|
||||
extern XLogRecPtr
|
||||
get_latest_checkpoint_location(char *data_directory);
|
||||
extern XLogRecPtr get_latest_checkpoint_location(const char *data_directory);
|
||||
|
||||
#endif /* _CONTROLDATA_H_ */
|
||||
|
||||
58
dbutils.c
58
dbutils.c
@@ -11,9 +11,9 @@
|
||||
|
||||
#include "repmgr.h"
|
||||
#include "dbutils.h"
|
||||
#include "controldata.h"
|
||||
|
||||
#include "catalog/pg_control.h"
|
||||
|
||||
#include "dirutil.h"
|
||||
|
||||
/* mainly for use by repmgrd */
|
||||
int server_version_num = UNKNOWN_SERVER_VERSION_NUM;
|
||||
@@ -1239,6 +1239,60 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info)
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason)
|
||||
{
|
||||
bool can_use = true;
|
||||
|
||||
if (guc_set(conn, "full_page_writes", "=", "off"))
|
||||
{
|
||||
if (can_use == false)
|
||||
appendPQExpBuffer(reason, "; ");
|
||||
|
||||
appendPQExpBuffer(
|
||||
reason,
|
||||
_("\"full_page_writes\" must be set to \"on\""));
|
||||
|
||||
can_use = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* "wal_log_hints" off - are data checksums available?
|
||||
* Note: we're checking the local pg_control file here as the value will
|
||||
* be the same throughout the cluster and saves a round-trip to the
|
||||
* demotion candidate.
|
||||
*/
|
||||
if (guc_set(conn, "wal_log_hints", "=", "on") == false)
|
||||
{
|
||||
int data_checksum_version = get_data_checksum_version(data_directory);
|
||||
|
||||
if (data_checksum_version < 0)
|
||||
{
|
||||
if (can_use == false)
|
||||
appendPQExpBuffer(reason, "; ");
|
||||
|
||||
appendPQExpBuffer(
|
||||
reason,
|
||||
_("\"wal_log_hints\" is set to \"off\" but unable to determine checksum version"));
|
||||
can_use = false;
|
||||
}
|
||||
else if (data_checksum_version == 0)
|
||||
{
|
||||
if (can_use == false)
|
||||
appendPQExpBuffer(reason, "; ");
|
||||
|
||||
appendPQExpBuffer(
|
||||
reason,
|
||||
_("\"wal_log_hints\" is set to \"off\" and checksums are disabled"));
|
||||
|
||||
can_use = false;
|
||||
}
|
||||
}
|
||||
|
||||
return can_use;
|
||||
}
|
||||
|
||||
|
||||
/* ================ */
|
||||
/* result functions */
|
||||
/* ================ */
|
||||
|
||||
@@ -322,6 +322,7 @@ int get_server_version(PGconn *conn, char *server_version);
|
||||
RecoveryType get_recovery_type(PGconn *conn);
|
||||
int get_primary_node_id(PGconn *conn);
|
||||
bool get_replication_info(PGconn *conn, ReplInfo *replication_info);
|
||||
bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
|
||||
|
||||
/* extension functions */
|
||||
ExtensionStatus get_repmgr_extension_status(PGconn *conn);
|
||||
|
||||
@@ -1563,7 +1563,8 @@ do_standby_follow(void)
|
||||
* we'll need the location of its configuration file; this
|
||||
* can be provided explicitly with -C/--remote-config-file,
|
||||
* otherwise repmgr will look in default locations on the
|
||||
* remote server
|
||||
* remote server (starting with the same path as the local
|
||||
* configuration file)
|
||||
*
|
||||
* TODO:
|
||||
* - make connection test timeouts/intervals configurable (see below)
|
||||
@@ -1695,6 +1696,31 @@ do_standby_switchover(void)
|
||||
|
||||
log_verbose(LOG_DEBUG, "remote node name is \"%s\"", remote_node_record.node_name);
|
||||
|
||||
|
||||
/*
|
||||
* If --force-rewind specified, check pg_rewind can be used
|
||||
*/
|
||||
|
||||
if (runtime_options.force_rewind == true)
|
||||
{
|
||||
PQExpBufferData reason;
|
||||
|
||||
initPQExpBuffer(&reason);
|
||||
|
||||
if (can_use_pg_rewind(remote_conn, config_file_options.data_directory, &reason) == false)
|
||||
{
|
||||
log_error(_("--force-rewind specified but pg_rewind cannot be used"));
|
||||
log_detail("%s", reason.data);
|
||||
termPQExpBuffer(&reason);
|
||||
PQfinish(local_conn);
|
||||
PQfinish(remote_conn);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
termPQExpBuffer(&reason);
|
||||
}
|
||||
|
||||
PQfinish(local_conn);
|
||||
PQfinish(remote_conn);
|
||||
|
||||
|
||||
@@ -187,4 +187,5 @@ extern void make_remote_repmgr_path(PQExpBufferData *outputbuf);
|
||||
extern void get_server_action(t_server_action action, char *script, char *data_dir);
|
||||
extern bool data_dir_required_for_action(t_server_action action);
|
||||
extern void get_node_data_directory(char *data_dir_buf);
|
||||
#endif
|
||||
|
||||
#endif /* _REPMGR_CLIENT_GLOBAL_H_ */
|
||||
|
||||
Reference in New Issue
Block a user