From 19e0b6a1b68e1eb869ce115948e6a27eedea03db Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 31 Jan 2019 10:49:39 +0900 Subject: [PATCH] doc: update "node rejoin" documentation In particular, update examples to reflect changed output in repmgr 4.3. --- doc/repmgr-node-rejoin.sgml | 101 +++++++++++++++++++++++------------- 1 file changed, 66 insertions(+), 35 deletions(-) diff --git a/doc/repmgr-node-rejoin.sgml b/doc/repmgr-node-rejoin.sgml index 619ce2a8..fc1c9867 100644 --- a/doc/repmgr-node-rejoin.sgml +++ b/doc/repmgr-node-rejoin.sgml @@ -216,6 +216,10 @@ postgres --single -D /var/lib/pgsql/data/ < /dev/null + + &repmgr; will attempt to verify whether the node can rejoin as-is, or whether + pg_rewind must be used (see following section). + @@ -241,65 +245,92 @@ + + We strongly recommend familiarizing yourself with pg_rewind before attempting + to use it with &repmgr;, as while it is an extremely useful tool, it is not + a "magic bullet". + + + + A typical use-case for pg_rewind is when a scenario like the following + is encountered: + + $ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \ + --force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose --dry-run + INFO: replication connection to the follow target node was successful + INFO: local and follow target system identifiers match + DETAIL: system identifier is 6652184002263212600 + ERROR: this node cannot attach to follow target node 3 + DETAIL: follow target server's timeline 2 forked off current database system timeline 1 before current recovery point 0/610D710 + HINT: use --force-rewind to execute pg_rewind + + Here, node3 was promoted to a primary while the local node was + still attached to the previous primary; this can potentially happen during e.g. a + network split. pg_rewind can re-sync the local node with node3, + removing the need for a full reclone. + + To have repmgr node rejoin use pg_rewind, pass the command line option --force-rewind, which will tell &repmgr; to execute pg_rewind to ensure the node can be rejoined successfully. - - Be aware that if pg_rewind is executed and actually performs a - rewind operation, any configuration files in the PostgreSQL data directory will be - overwritten with those from the source server. - - - To prevent this happening, provide a comma-separated list of files to retain - using the --config-file command line option; the specified files - will be archived in a temporary directory (whose parent directory can be specified with - --config-archive-dir) and restored once the rewind operation is - complete. - + + + Be aware that if pg_rewind is executed and actually performs a + rewind operation, any configuration files in the PostgreSQL data directory will be + overwritten with those from the source server. + + + To prevent this happening, provide a comma-separated list of files to retain + using the --config-file command line option; the specified files + will be archived in a temporary directory (whose parent directory can be specified with + --config-archive-dir) and restored once the rewind operation is + complete. + + Example, first using --dry-run, then actually executing the node rejoin command. - $ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \ - --force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose --dry-run - NOTICE: using provided configuration file "/etc/repmgr.conf" + $ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \ + --config-files=postgresql.local.conf,postgresql.conf --verbose --force-rewind --dry-run + INFO: replication connection to the follow target node was successful + INFO: local and follow target system identifiers match + DETAIL: system identifier is 6652460429293670710 + NOTICE: pg_rewind execution required for this node to attach to follow target node 3 + DETAIL: follow target server's timeline 2 forked off current database system timeline 1 before current recovery point 0/610D710 INFO: prerequisites for using pg_rewind are met - INFO: file "postgresql.local.conf" would be copied to "/tmp/repmgr-config-archive-node1/postgresql.local.conf" - INFO: file "postgresql.conf" would be copied to "/tmp/repmgr-config-archive-node1/postgresql.local.conf" - INFO: 2 files would have been copied to "/tmp/repmgr-config-archive-node1" - INFO: directory "/tmp/repmgr-config-archive-node1" deleted + INFO: file "postgresql.local.conf" would be copied to "/tmp/repmgr-config-archive-node2/postgresql.local.conf" + INFO: file "postgresql.replication-setup.conf" would be copied to "/tmp/repmgr-config-archive-node2/postgresql.replication-setup.conf" INFO: pg_rewind would now be executed DETAIL: pg_rewind command is: - pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node1 dbname=repmgr user=repmgr' + pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node3 dbname=repmgr user=repmgr' + INFO: prerequisites for executing NODE REJOIN are met If is used with the option, - this checks the prerequisites for using pg_rewind, but cannot - predict the outcome of actually executing pg_rewind. + this checks the prerequisites for using pg_rewind, but is + not an absolute guarantee that actually executing pg_rewind + will succeed. - $ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \ - --force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose - NOTICE: using provided configuration file "/etc/repmgr.conf" - INFO: prerequisites for using pg_rewind are met - INFO: 2 files copied to "/tmp/repmgr-config-archive-node1" + $ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \ + --config-files=postgresql.local.conf,postgresql.conf --verbose --force-rewind + NOTICE: pg_rewind execution required for this node to attach to follow target node 3 + DETAIL: follow target server's timeline 2 forked off current database system timeline 1 before current recovery point 0/610D710 NOTICE: executing pg_rewind - NOTICE: 2 files copied to /var/lib/pgsql/data - INFO: directory "/tmp/repmgr-config-archive-node1" deleted - INFO: deleting "recovery.done" - INFO: setting node 1's primary to node 2 - NOTICE: starting server using "pg_ctl-l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' start" - waiting for server to start.... done - server started + DETAIL: pg_rewind command is "pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node3 dbname=repmgr user=repmgr'" + NOTICE: 2 files copied to /var/lib/postgresql/data + NOTICE: setting node 2's upstream to node 3 + NOTICE: starting server using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' start" NOTICE: NODE REJOIN successful - DETAIL: node 1 is now attached to node 2 + DETAIL: node 2 is now attached to node 3