repmgr node rejoin: add --dry-run option

This commit is contained in:
Ian Barwick
2017-10-25 11:01:58 +09:00
parent 9f5edd07ad
commit c5d91ca88c
2 changed files with 140 additions and 49 deletions

View File

@@ -43,17 +43,72 @@
<filename>repmgr.conf</filename> for the stopped node *must* be supplied explicitly if not
otherwise available.
</para>
<para>
<command>repmgr node rejoin</command> can optionally use <command>pg_rewind</command> to re-integrate a
node which has diverged from the rest of the cluster, typically a failed primary.
<command>pg_rewind</command> is available in PostgreSQL 9.5 and later.
</para>
<note>
<para>
<command>pg_rewind</command> *requires* that either <varname>wal_log_hints</varname> is enabled, or that
data checksums were enabled when the cluster was initialized. See the
<ulink url="https://www.postgresql.org/docs/current/static/app-pgrewind.html"><command>pg_rewind</command> documentation</ulink> for details.
<sect1 id="repmgr-node-rejoin-pg-rewind">
<title>Using <command>pg_rewind</command></title>
<para>
<command>repmgr node rejoin</command> can optionally use <command>pg_rewind</command> to re-integrate a
node which has diverged from the rest of the cluster, typically a failed primary.
<command>pg_rewind</command> is available in PostgreSQL 9.5 and later.
</para>
</note>
<note>
<para>
<command>pg_rewind</command> *requires* that either <varname>wal_log_hints</varname> is enabled, or that
data checksums were enabled when the cluster was initialized. See the
<ulink url="https://www.postgresql.org/docs/current/static/app-pgrewind.html"><command>pg_rewind</command> documentation</ulink> for details.
</para>
</note>
<para>
To have <command>repmgr node rejoin</command> use <command>pg_rewind</command> if required,
pass the command line option <literal>--force-rewind</literal>, which will tell &repmgr;
to execute <command>pg_rewind</command> to ensure the node can be rejoined successfully.
</para>
<para>
Be aware that if <command>pg_rewind</command> is executed and actually performs a
rewind operation, any configuration files in the PostgreSQL data directory will be
overwritten with those from the source server.
</para>
<para>
To prevent this happening, provide a comma-separated list of files to retain
using the <literal>--config-file</literal> command line option; the specified files
will be archived in a temporary directory (whose parent directory can be specified with
<literal>--config-archive-dir</literal>) and restored once the rewind operation is
complete.
</para>
<para>
Example, first using <literal>--dry-run</literal>, then actually executing the
<literal>node rejoin command</literal>.
<programlisting>
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
--force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose --dry-run
NOTICE: using provided configuration file "/etc/repmgr.conf"
INFO: prerequisites for using pg_rewind are met
INFO: file "postgresql.local.conf" would be copied to "/tmp/repmgr-config-archive-node1/postgresql.local.conf"
INFO: file "postgresql.conf" would be copied to "/tmp/repmgr-config-archive-node1/postgresql.local.conf"
INFO: 2 files would have been copied to "/tmp/repmgr-config-archive-node1"
INFO: directory "/tmp/repmgr-config-archive-node1" deleted
INFO: pg_rewind would now be executed
DETAIL: pg_rewind command is:
pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node1 dbname=repmgr user=repmgr'</programlisting>
<programlisting>
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
--force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose
NOTICE: using provided configuration file "/etc/repmgr.conf"
INFO: prerequisites for using pg_rewind are met
INFO: 2 files copied to "/tmp/repmgr-config-archive-node1"
NOTICE: executing pg_rewind
NOTICE: 2 files copied to /space/sda1/ibarwick/repmgr-test/node_1/data
INFO: directory "/tmp/repmgr-config-archive-node1" deleted
INFO: deleting "recovery.done"
INFO: setting node 1's primary to node 2
NOTICE: starting server using "pg_ctl-l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' start"
waiting for server to start.... done
server started
NOTICE: NODE REJOIN successful
DETAIL: node 1 is now attached to node 2</programlisting>
</para>
</sect1>
</chapter>

View File

@@ -1821,7 +1821,7 @@ do_node_rejoin(void)
/*
* Intended mainly for "internal" use by `node rejoin` on the local node when
* For "internal" use by `node rejoin` on the local node when
* called by "standby switchover" from the remote node.
*
* This archives any configuration files in the data directory, which may be
@@ -1865,8 +1865,6 @@ _do_node_archive_config(void)
termPQExpBuffer(&archive_dir);
exit(ERR_BAD_CONFIG);
}
}
else if (!S_ISDIR(statbuf.st_mode))
{
@@ -1876,7 +1874,6 @@ _do_node_archive_config(void)
exit(ERR_BAD_CONFIG);
}
arcdir = opendir(archive_dir.data);
if (arcdir == NULL)
@@ -1888,42 +1885,46 @@ _do_node_archive_config(void)
exit(ERR_BAD_CONFIG);
}
/*
* attempt to remove any existing files in the directory TODO: collate
* problem files into list
*/
while ((arcdir_ent = readdir(arcdir)) != NULL)
if (runtime_options.dry_run == false)
{
PQExpBufferData arcdir_ent_path;
initPQExpBuffer(&arcdir_ent_path);
appendPQExpBuffer(&arcdir_ent_path,
"%s/%s",
archive_dir.data,
arcdir_ent->d_name);
if (stat(arcdir_ent_path.data, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
/*
* attempt to remove any existing files in the directory TODO: collate
* problem files into list
*/
while ((arcdir_ent = readdir(arcdir)) != NULL)
{
PQExpBufferData arcdir_ent_path;
initPQExpBuffer(&arcdir_ent_path);
appendPQExpBuffer(&arcdir_ent_path,
"%s/%s",
archive_dir.data,
arcdir_ent->d_name);
if (stat(arcdir_ent_path.data, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
{
termPQExpBuffer(&arcdir_ent_path);
continue;
}
if (unlink(arcdir_ent_path.data) == -1)
{
log_error(_("unable to delete file in temporary archive directory"));
log_detail(_("file is: \"%s\""), arcdir_ent_path.data);
log_detail("%s", strerror(errno));
closedir(arcdir);
termPQExpBuffer(&arcdir_ent_path);
exit(ERR_BAD_CONFIG);
}
termPQExpBuffer(&arcdir_ent_path);
continue;
}
if (unlink(arcdir_ent_path.data) == -1)
{
log_error(_("unable to delete file in temporary archive directory"));
log_detail(_("file is: \"%s\""), arcdir_ent_path.data);
log_detail("%s", strerror(errno));
closedir(arcdir);
termPQExpBuffer(&arcdir_ent_path);
exit(ERR_BAD_CONFIG);
}
termPQExpBuffer(&arcdir_ent_path);
closedir(arcdir);
}
closedir(arcdir);
/*
* extract list of config files from --config-files
*/
@@ -1999,18 +2000,53 @@ _do_node_archive_config(void)
}
else
{
log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"",
cell->key, dest_file.data);
copy_file(cell->value, dest_file.data);
copied_count++;
if (runtime_options.dry_run == true)
{
log_info("file \"%s\" would be copied to \"%s\"",
cell->key, dest_file.data);
copied_count++;
}
else
{
log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"",
cell->key, dest_file.data);
copy_file(cell->value, dest_file.data);
copied_count++;
}
}
termPQExpBuffer(&dest_file);
}
if (runtime_options.dry_run == true)
{
log_verbose(LOG_INFO, _("%i files would have been copied to \"%s\""),
copied_count, archive_dir.data);
}
else
{
log_verbose(LOG_INFO, _("%i files copied to \"%s\""),
copied_count, archive_dir.data);
}
log_verbose(LOG_INFO, _("%i files copied to \"%s\""),
copied_count, archive_dir.data);
if (runtime_options.dry_run == true)
{
/*
* Delete directory in --dry-run mode - it should be empty unless it's been
* interfered with for some reason, in which case manual intervention is
* required
*/
if (rmdir(archive_dir.data) != 0 && errno != EEXIST)
{
log_warning(_("unable to delete directory \"%s\""), archive_dir.data);
log_detail("%s", strerror(errno));
log_hint(_("directory may need to be manually removed"));
}
else
{
log_verbose(LOG_INFO, "directory \"%s\" deleted", archive_dir.data);
}
}
termPQExpBuffer(&archive_dir);
}