Enable pg_rewind to be used with PostgreSQL 9.3/9.4

pg_rewind is not part of the core distribution for those, but we
provided support in repmgr 3.3 so should extend it to repmgr 4.

Note that there is no check in place whether the pg_rewind binary
exists, so it's up to the user to ensure it's present.

Addresses GitHub #413.
This commit is contained in:
Ian Barwick
2018-03-30 16:04:00 +09:00
committed by Ian Barwick
parent 94d26dbe9f
commit 6a1797cadd
9 changed files with 180 additions and 94 deletions

View File

@@ -1342,67 +1342,6 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info)
}
bool
can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason)
{
bool can_use = true;
if (server_version_num == UNKNOWN_SERVER_VERSION_NUM)
server_version_num = get_server_version(conn, NULL);
if (server_version_num < 90500)
{
appendPQExpBuffer(reason,
_("pg_rewind available from PostgreSQL 9.5"));
return false;
}
if (guc_set(conn, "full_page_writes", "=", "off"))
{
if (can_use == false)
appendPQExpBuffer(reason, "; ");
appendPQExpBuffer(reason,
_("\"full_page_writes\" must be set to \"on\""));
can_use = false;
}
/*
* "wal_log_hints" off - are data checksums available? Note: we're
* checking the local pg_control file here as the value will be the same
* throughout the cluster and saves a round-trip to the demotion
* candidate.
*/
if (guc_set(conn, "wal_log_hints", "=", "on") == false)
{
int data_checksum_version = get_data_checksum_version(data_directory);
if (data_checksum_version < 0)
{
if (can_use == false)
appendPQExpBuffer(reason, "; ");
appendPQExpBuffer(reason,
_("\"wal_log_hints\" is set to \"off\" but unable to determine data checksum version"));
can_use = false;
}
else if (data_checksum_version == 0)
{
if (can_use == false)
appendPQExpBuffer(reason, "; ");
appendPQExpBuffer(reason,
_("\"wal_log_hints\" is set to \"off\" and data checksums are disabled"));
can_use = false;
}
}
return can_use;
}
int
get_ready_archive_files(PGconn *conn, const char *data_directory)
{

View File

@@ -387,7 +387,6 @@ bool get_cluster_size(PGconn *conn, char *size);
int get_server_version(PGconn *conn, char *server_version);
RecoveryType get_recovery_type(PGconn *conn);
int get_primary_node_id(PGconn *conn);
bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
int get_ready_archive_files(PGconn *conn, const char *data_directory);
bool identify_system(PGconn *repl_conn, t_system_identification *identification);
bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id);

View File

@@ -84,11 +84,14 @@
</varlistentry>
<varlistentry>
<term><option>--force-rewind</option></term>
<term><option>--force-rewind[=/path/to/pg_rewind]</option></term>
<listitem>
<para>
Use <application>pg_rewind</application> to reintegrate the old primary if necessary
(PostgreSQL 9.5 and later).
(and the prerequisites for using <application>pg_rewind</application> are met).
If using PostgreSQL 9.3 or 9.4, and the <application>pg_rewind</application>
binary is not installed in the PostgreSQL <filename>bin</filename> directory,
provide its full path. For more details see also <xref linkend="switchover-pg-rewind">.
</para>
</listitem>
</varlistentry>

View File

@@ -179,6 +179,46 @@
</simpara>
</note>
<sect2 id="switchover-pg-rewind" xreflabel="Switchover and pg_rewind">
<title>Switchover and pg_rewind</title>
<para>
If the demotion candidate does not shut down smoothly or cleanly, there's a risk it
will have a slightly divergent timeline and will not be able to attach to the new
primary. To fix this situation without needing to reclone the old primary, it's
possible to use the <application>pg_rewind</application> utility, which will usually be
able to resync the two servers.
</para>
<para>
To have &repmgr; execute <application>pg_rewind</application> if it detects this
situation after promoting the new primary, add the <option>--force-rewind</option>
option.
</para>
<note>
<simpara>
If &repmgr; detects a situation where it needs to execute <application>pg_rewind</application>,
it will execute a <literal>CHECKPOINT</literal> on the new primary before executing
<application>pg_rewind</application>.
</simpara>
</note>
<para>
For more details on <application>pg_rewind</application>, see:
<ulink url="https://www.postgresql.org/docs/current/static/app-pgrewind.html">https://www.postgresql.org/docs/current/static/app-pgrewind.html</ulink>.
</para>
<para>
<application>pg_rewind</application> has been part of the core PostgreSQL distribution since
version 9.5. Users of versions 9.3 and 9.4 will need to manually install; source code available here:
<ulink url="https://github.com/vmware/pg_rewind">https://github.com/vmware/pg_rewind</ulink>.
If the <application>pg_rewind</application>
binary is not installed in the PostgreSQL <filename>bin</filename> directory, provide
its full path on the demotion candidate with <option>--force-rewind</option>.
</para>
<para>
Note that building the 9.3/9.4 version of <application>pg_rewind</application> requires the PostgreSQL
source code. Also, PostgreSQL 9.3 does not provide <varname>wal_log_hints</varname>,
meaning data checksums must have been enabled when the database was initialized.
</para>
</sect2>
</sect1>

View File

@@ -1728,7 +1728,7 @@ do_node_rejoin(void)
{
log_error(_("database is not shut down cleanly"));
if (runtime_options.force_rewind == true)
if (runtime_options.force_rewind_used == true)
{
log_detail(_("pg_rewind will not be able to run"));
}
@@ -1779,7 +1779,7 @@ do_node_rejoin(void)
* archived
*/
if (runtime_options.force_rewind == true)
if (runtime_options.force_rewind_used == true)
{
PQExpBufferData reason;
PQExpBufferData msg;
@@ -1816,7 +1816,7 @@ do_node_rejoin(void)
* Forcibly rewind node if requested (this is mainly for use when this
* action is being executed by "repmgr standby switchover")
*/
if (runtime_options.force_rewind == true && runtime_options.dry_run == false)
if (runtime_options.force_rewind_used == true && runtime_options.dry_run == false)
{
int ret;
PQExpBufferData filebuf;
@@ -1826,9 +1826,18 @@ do_node_rejoin(void)
/* execute pg_rewind */
initPQExpBuffer(&command);
appendPQExpBuffer(&command,
"%s -D ",
make_pg_path("pg_rewind"));
if (runtime_options.force_rewind_path[0] != '\0')
{
appendPQExpBuffer(&command,
"%s -D ",
runtime_options.force_rewind_path);
}
else
{
appendPQExpBuffer(&command,
"%s -D ",
make_pg_path("pg_rewind"));
}
appendShellString(&command,
config_file_options.data_directory);
@@ -2438,15 +2447,15 @@ do_node_help(void)
puts("");
printf(_(" Configuration file required, runs on local node only.\n"));
puts("");
printf(_(" --csv emit output as CSV\n"));
printf(_(" --nagios emit output in Nagios format (individual status output only)\n"));
printf(_(" --csv emit output as CSV\n"));
printf(_(" --nagios emit output in Nagios format (individual status output only)\n"));
puts("");
printf(_(" Following options check an individual status:\n"));
printf(_(" --archive-ready number of WAL files ready for archiving\n"));
printf(_(" --downstream whether all downstream nodes are connected\n"));
printf(_(" --replication-lag replication lag in seconds (standbys only)\n"));
printf(_(" --role check node has expected role\n"));
printf(_(" --slots check for inactive replication slots\n"));
printf(_(" --archive-ready number of WAL files ready for archiving\n"));
printf(_(" --downstream whether all downstream nodes are connected\n"));
printf(_(" --replication-lag replication lag in seconds (standbys only)\n"));
printf(_(" --role check node has expected role\n"));
printf(_(" --slots check for inactive replication slots\n"));
puts("");
@@ -2456,13 +2465,15 @@ do_node_help(void)
puts("");
printf(_(" Configuration file required, runs on local node only.\n"));
puts("");
printf(_(" --dry-run check that the prerequisites are met for rejoining the node\n" \
" (including usability of \"pg_rewind\" if requested)\n"));
printf(_(" --force-rewind execute \"pg_rewind\" if necessary\n"));
printf(_(" --config-files comma-separated list of configuration files to retain\n" \
" after executing \"pg_rewind\"\n"));
printf(_(" --config-archive-dir directory to temporarily store retained configuration files\n" \
" (default: /tmp)\n"));
printf(_(" --dry-run check that the prerequisites are met for rejoining the node\n" \
" (including usability of \"pg_rewind\" if requested)\n"));
printf(_(" --force-rewind[=VALUE] execute \"pg_rewind\" if necessary\n"));
printf(_(" (9.3 and 9.4 - provide full \"pg_rewind\" path)\n"));
printf(_(" --config-files comma-separated list of configuration files to retain\n" \
" after executing \"pg_rewind\"\n"));
printf(_(" --config-archive-dir directory to temporarily store retained configuration files\n" \
" (default: /tmp)\n"));
puts("");
printf(_("NODE SERVICE\n"));

View File

@@ -2771,7 +2771,7 @@ do_standby_switchover(void)
* archived
*/
if (runtime_options.force_rewind == true)
if (runtime_options.force_rewind_used == true)
{
PQExpBufferData reason;
PQExpBufferData msg;
@@ -3578,7 +3578,7 @@ do_standby_switchover(void)
* in pg_control, which is written at the first checkpoint, which might not
* occur immediately.
*/
if (runtime_options.force_rewind == true)
if (runtime_options.force_rewind_used == true)
{
log_notice(_("issuing CHECKPOINT"));
checkpoint(local_conn);
@@ -3595,7 +3595,7 @@ do_standby_switchover(void)
KeyValueListCell *cell = NULL;
bool first_entry = true;
if (runtime_options.force_rewind == false)
if (runtime_options.force_rewind_used == false)
{
log_error(_("new primary diverges from former primary and --force-rewind not provided"));
log_hint(_("the former primary will need to be restored manually, or use \"repmgr node rejoin\""));
@@ -3605,7 +3605,16 @@ do_standby_switchover(void)
}
appendPQExpBuffer(&node_rejoin_options,
" --force-rewind --config-files=");
" --force-rewind");
if (runtime_options.force_rewind_path[0] != '\0')
{
appendPQExpBuffer(&node_rejoin_options,
"=%s",
runtime_options.force_rewind_path);
}
appendPQExpBuffer(&node_rejoin_options,
" --config-files=");
for (cell = remote_config_files.head; cell; cell = cell->next)
{
@@ -6315,7 +6324,9 @@ do_standby_help(void)
printf(_(" --always-promote promote standby even if behind original primary\n"));
printf(_(" --dry-run perform checks etc. but don't actually execute switchover\n"));
printf(_(" -F, --force ignore warnings and continue anyway\n"));
printf(_(" --force-rewind 9.5 and later - use pg_rewind to reintegrate the old primary if necessary\n"));
printf(_(" --force-rewind[=VALUE] use \"pg_rewind\" to reintegrate the old primary if necessary\n"));
printf(_(" (9.3 and 9.4 - provide \"pg_rewind\" path)\n"));
printf(_(" -R, --remote-user=USERNAME database server username for SSH operations (default: \"%s\")\n"), runtime_options.username);
printf(_(" --siblings-follow have other standbys follow new primary\n"));

View File

@@ -92,7 +92,8 @@ typedef struct
/* "standby switchover" options */
bool always_promote;
bool force_rewind;
bool force_rewind_used;
char force_rewind_path[MAXPGPATH];
bool siblings_follow;
/* "node status" options */
@@ -152,7 +153,7 @@ typedef struct
/* "standby register" options */ \
false, 0, DEFAULT_WAIT_START, \
/* "standby switchover" options */ \
false, false, false, \
false, false, "", false, \
/* "node status" options */ \
false, \
/* "node check" options */ \
@@ -231,5 +232,6 @@ extern void get_server_action(t_server_action action, char *script, char *data_d
extern bool data_dir_required_for_action(t_server_action action);
extern void get_node_data_directory(char *data_dir_buf);
extern void init_node_record(t_node_info *node_record);
extern bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
#endif /* _REPMGR_CLIENT_GLOBAL_H_ */

View File

@@ -53,6 +53,7 @@
#include "repmgr.h"
#include "compat.h"
#include "controldata.h"
#include "repmgr-client.h"
#include "repmgr-client-global.h"
#include "repmgr-action-primary.h"
@@ -421,7 +422,13 @@ main(int argc, char **argv)
break;
case OPT_FORCE_REWIND:
runtime_options.force_rewind = true;
runtime_options.force_rewind_used = true;
if (optarg != NULL)
{
strncpy(runtime_options.force_rewind_path, optarg, MAXPGPATH);
}
break;
case OPT_SIBLINGS_FOLLOW:
@@ -1605,7 +1612,7 @@ check_cli_parameters(const int action)
}
}
if (runtime_options.force_rewind == true)
if (runtime_options.force_rewind_used == true)
{
switch (action)
{
@@ -2749,3 +2756,77 @@ init_node_record(t_node_info *node_record)
create_slot_name(node_record->slot_name, config_file_options.node_id);
}
}
bool
can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason)
{
bool can_use = true;
int server_version_num = get_server_version(conn, NULL);
/* wal_log_hints not available in 9.3, so just determine if data checksums enabled */
if (server_version_num < 90400)
{
int data_checksum_version = get_data_checksum_version(data_directory);
if (data_checksum_version < 0)
{
appendPQExpBuffer(reason,
_("unable to determine data checksum version"));
can_use = false;
}
else if (data_checksum_version == 0)
{
appendPQExpBuffer(reason,
_("this cluster was initialised without data checksums"));
can_use = false;
}
return can_use;
}
/* "full_page_writes" must be on in any case */
if (guc_set(conn, "full_page_writes", "=", "off"))
{
if (can_use == false)
appendPQExpBuffer(reason, "; ");
appendPQExpBuffer(reason,
_("\"full_page_writes\" must be set to \"on\""));
can_use = false;
}
/*
* "wal_log_hints" off - are data checksums available? Note: we're
* checking the local pg_control file here as the value will be the same
* throughout the cluster and saves a round-trip to the demotion
* candidate.
*/
if (guc_set(conn, "wal_log_hints", "=", "on") == false)
{
int data_checksum_version = get_data_checksum_version(data_directory);
if (data_checksum_version < 0)
{
if (can_use == false)
appendPQExpBuffer(reason, "; ");
appendPQExpBuffer(reason,
_("\"wal_log_hints\" is set to \"off\" but unable to determine data checksum version"));
can_use = false;
}
else if (data_checksum_version == 0)
{
if (can_use == false)
appendPQExpBuffer(reason, "; ");
appendPQExpBuffer(reason,
_("\"wal_log_hints\" is set to \"off\" and data checksums are disabled"));
can_use = false;
}
}
return can_use;
}

View File

@@ -168,7 +168,7 @@ static struct option long_options[] =
/* "node rejoin" options */
{"config-files", required_argument, NULL, OPT_CONFIG_FILES},
{"config-archive-dir", required_argument, NULL, OPT_CONFIG_ARCHIVE_DIR},
{"force-rewind", no_argument, NULL, OPT_FORCE_REWIND},
{"force-rewind", optional_argument, NULL, OPT_FORCE_REWIND},
/* "node service" options */
{"action", required_argument, NULL, OPT_ACTION},