mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
repmgr node rejoin: add --dry-run option
This commit is contained in:
@@ -43,17 +43,72 @@
|
||||
<filename>repmgr.conf</filename> for the stopped node *must* be supplied explicitly if not
|
||||
otherwise available.
|
||||
</para>
|
||||
<para>
|
||||
<command>repmgr node rejoin</command> can optionally use <command>pg_rewind</command> to re-integrate a
|
||||
node which has diverged from the rest of the cluster, typically a failed primary.
|
||||
<command>pg_rewind</command> is available in PostgreSQL 9.5 and later.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
<command>pg_rewind</command> *requires* that either <varname>wal_log_hints</varname> is enabled, or that
|
||||
data checksums were enabled when the cluster was initialized. See the
|
||||
<ulink url="https://www.postgresql.org/docs/current/static/app-pgrewind.html"><command>pg_rewind</command> documentation</ulink> for details.
|
||||
|
||||
<sect1 id="repmgr-node-rejoin-pg-rewind">
|
||||
<title>Using <command>pg_rewind</command></title>
|
||||
<para>
|
||||
<command>repmgr node rejoin</command> can optionally use <command>pg_rewind</command> to re-integrate a
|
||||
node which has diverged from the rest of the cluster, typically a failed primary.
|
||||
<command>pg_rewind</command> is available in PostgreSQL 9.5 and later.
|
||||
</para>
|
||||
</note>
|
||||
<note>
|
||||
<para>
|
||||
<command>pg_rewind</command> *requires* that either <varname>wal_log_hints</varname> is enabled, or that
|
||||
data checksums were enabled when the cluster was initialized. See the
|
||||
<ulink url="https://www.postgresql.org/docs/current/static/app-pgrewind.html"><command>pg_rewind</command> documentation</ulink> for details.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
To have <command>repmgr node rejoin</command> use <command>pg_rewind</command> if required,
|
||||
pass the command line option <literal>--force-rewind</literal>, which will tell &repmgr;
|
||||
to execute <command>pg_rewind</command> to ensure the node can be rejoined successfully.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Be aware that if <command>pg_rewind</command> is executed and actually performs a
|
||||
rewind operation, any configuration files in the PostgreSQL data directory will be
|
||||
overwritten with those from the source server.
|
||||
</para>
|
||||
<para>
|
||||
To prevent this happening, provide a comma-separated list of files to retain
|
||||
using the <literal>--config-file</literal> command line option; the specified files
|
||||
will be archived in a temporary directory (whose parent directory can be specified with
|
||||
<literal>--config-archive-dir</literal>) and restored once the rewind operation is
|
||||
complete.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Example, first using <literal>--dry-run</literal>, then actually executing the
|
||||
<literal>node rejoin command</literal>.
|
||||
<programlisting>
|
||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
|
||||
--force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose --dry-run
|
||||
NOTICE: using provided configuration file "/etc/repmgr.conf"
|
||||
INFO: prerequisites for using pg_rewind are met
|
||||
INFO: file "postgresql.local.conf" would be copied to "/tmp/repmgr-config-archive-node1/postgresql.local.conf"
|
||||
INFO: file "postgresql.conf" would be copied to "/tmp/repmgr-config-archive-node1/postgresql.local.conf"
|
||||
INFO: 2 files would have been copied to "/tmp/repmgr-config-archive-node1"
|
||||
INFO: directory "/tmp/repmgr-config-archive-node1" deleted
|
||||
INFO: pg_rewind would now be executed
|
||||
DETAIL: pg_rewind command is:
|
||||
pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node1 dbname=repmgr user=repmgr'</programlisting>
|
||||
<programlisting>
|
||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
|
||||
--force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose
|
||||
NOTICE: using provided configuration file "/etc/repmgr.conf"
|
||||
INFO: prerequisites for using pg_rewind are met
|
||||
INFO: 2 files copied to "/tmp/repmgr-config-archive-node1"
|
||||
NOTICE: executing pg_rewind
|
||||
NOTICE: 2 files copied to /space/sda1/ibarwick/repmgr-test/node_1/data
|
||||
INFO: directory "/tmp/repmgr-config-archive-node1" deleted
|
||||
INFO: deleting "recovery.done"
|
||||
INFO: setting node 1's primary to node 2
|
||||
NOTICE: starting server using "pg_ctl-l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' start"
|
||||
waiting for server to start.... done
|
||||
server started
|
||||
NOTICE: NODE REJOIN successful
|
||||
DETAIL: node 1 is now attached to node 2</programlisting>
|
||||
</para>
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
@@ -1821,7 +1821,7 @@ do_node_rejoin(void)
|
||||
|
||||
|
||||
/*
|
||||
* Intended mainly for "internal" use by `node rejoin` on the local node when
|
||||
* For "internal" use by `node rejoin` on the local node when
|
||||
* called by "standby switchover" from the remote node.
|
||||
*
|
||||
* This archives any configuration files in the data directory, which may be
|
||||
@@ -1865,8 +1865,6 @@ _do_node_archive_config(void)
|
||||
termPQExpBuffer(&archive_dir);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
else if (!S_ISDIR(statbuf.st_mode))
|
||||
{
|
||||
@@ -1876,7 +1874,6 @@ _do_node_archive_config(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
|
||||
arcdir = opendir(archive_dir.data);
|
||||
|
||||
if (arcdir == NULL)
|
||||
@@ -1888,42 +1885,46 @@ _do_node_archive_config(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/*
|
||||
* attempt to remove any existing files in the directory TODO: collate
|
||||
* problem files into list
|
||||
*/
|
||||
while ((arcdir_ent = readdir(arcdir)) != NULL)
|
||||
if (runtime_options.dry_run == false)
|
||||
{
|
||||
PQExpBufferData arcdir_ent_path;
|
||||
|
||||
initPQExpBuffer(&arcdir_ent_path);
|
||||
|
||||
appendPQExpBuffer(&arcdir_ent_path,
|
||||
"%s/%s",
|
||||
archive_dir.data,
|
||||
arcdir_ent->d_name);
|
||||
|
||||
if (stat(arcdir_ent_path.data, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
|
||||
/*
|
||||
* attempt to remove any existing files in the directory TODO: collate
|
||||
* problem files into list
|
||||
*/
|
||||
while ((arcdir_ent = readdir(arcdir)) != NULL)
|
||||
{
|
||||
PQExpBufferData arcdir_ent_path;
|
||||
|
||||
initPQExpBuffer(&arcdir_ent_path);
|
||||
|
||||
appendPQExpBuffer(&arcdir_ent_path,
|
||||
"%s/%s",
|
||||
archive_dir.data,
|
||||
arcdir_ent->d_name);
|
||||
|
||||
if (stat(arcdir_ent_path.data, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
|
||||
{
|
||||
termPQExpBuffer(&arcdir_ent_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (unlink(arcdir_ent_path.data) == -1)
|
||||
{
|
||||
log_error(_("unable to delete file in temporary archive directory"));
|
||||
log_detail(_("file is: \"%s\""), arcdir_ent_path.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
closedir(arcdir);
|
||||
termPQExpBuffer(&arcdir_ent_path);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
termPQExpBuffer(&arcdir_ent_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (unlink(arcdir_ent_path.data) == -1)
|
||||
{
|
||||
log_error(_("unable to delete file in temporary archive directory"));
|
||||
log_detail(_("file is: \"%s\""), arcdir_ent_path.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
closedir(arcdir);
|
||||
termPQExpBuffer(&arcdir_ent_path);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
termPQExpBuffer(&arcdir_ent_path);
|
||||
closedir(arcdir);
|
||||
}
|
||||
|
||||
closedir(arcdir);
|
||||
|
||||
/*
|
||||
* extract list of config files from --config-files
|
||||
*/
|
||||
@@ -1999,18 +2000,53 @@ _do_node_archive_config(void)
|
||||
}
|
||||
else
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"",
|
||||
cell->key, dest_file.data);
|
||||
copy_file(cell->value, dest_file.data);
|
||||
copied_count++;
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
log_info("file \"%s\" would be copied to \"%s\"",
|
||||
cell->key, dest_file.data);
|
||||
copied_count++;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"",
|
||||
cell->key, dest_file.data);
|
||||
copy_file(cell->value, dest_file.data);
|
||||
copied_count++;
|
||||
}
|
||||
}
|
||||
|
||||
termPQExpBuffer(&dest_file);
|
||||
}
|
||||
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
log_verbose(LOG_INFO, _("%i files would have been copied to \"%s\""),
|
||||
copied_count, archive_dir.data);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_verbose(LOG_INFO, _("%i files copied to \"%s\""),
|
||||
copied_count, archive_dir.data);
|
||||
}
|
||||
|
||||
log_verbose(LOG_INFO, _("%i files copied to \"%s\""),
|
||||
copied_count, archive_dir.data);
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
/*
|
||||
* Delete directory in --dry-run mode - it should be empty unless it's been
|
||||
* interfered with for some reason, in which case manual intervention is
|
||||
* required
|
||||
*/
|
||||
if (rmdir(archive_dir.data) != 0 && errno != EEXIST)
|
||||
{
|
||||
log_warning(_("unable to delete directory \"%s\""), archive_dir.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
log_hint(_("directory may need to be manually removed"));
|
||||
}
|
||||
else
|
||||
{
|
||||
log_verbose(LOG_INFO, "directory \"%s\" deleted", archive_dir.data);
|
||||
}
|
||||
}
|
||||
|
||||
termPQExpBuffer(&archive_dir);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user