"standby follow": simplify check when follow target has higher timeline

No need for a CHECKPOINT here, which simplifies things considerably.
2026-06-01 03:39:05 +00:00 · 2019-01-11 16:34:04 +09:00
parent b3c2831bd3
commit 028c874f81
2 changed files with 35 additions and 60 deletions
@@ -22,6 +22,17 @@
      default location; no additional arguments are required.
    </para>
 	<para>
 	  By default &repmgr; will attempt to attach the standby to the current primary.
 	  If <option>--upstream-node-id</option> is provided, &repmgr; will attempt
 	  to attach the standby to the specified node, which can be another standby.
 	</para>
    <para>
      This command will force a restart of the standby server, which must be
      running.
    </para>
 	<tip>
      <para>
 		To re-add an inactive node to the replication cluster, use
@@ -29,29 +40,25 @@
      </para>
 	</tip>
    <para>
      This command will force a restart of the standby server, which must be
      running. Additionally, in order to be able to verify whether the standby
      can attach to the upstream node, a <command>CHECKPOINT</command> will
      be executed - this requires superuser privileges, and will be executed
      even with the <option>--dry-run</option> option.
    </para>
    <important>
      <para>
        If the &repmgr; database user is not a superuser, it will not be possible
        to execute <command>CHECKPOINT</command>, meaning &repmgr; may not be
        able to determine whether the upstream node can be followed.
      </para>
    </important>
 	<para>
 	  <command>repmgr standby follow</command> will wait up to
 	  <varname>standby_follow_timeout</varname> seconds (default: <literal>30</literal>)
-	  to verify the standby has actually connected to the new primary.
+	  to verify the standby has actually connected to the new upstream node.
 	</para>
 	<note>
 	  <para>
 		If <option>recovery_min_apply_delay</option> is set for the standby, it
 		will not attach to the new upstream node until it has replayed available
 		WAL.
 	  </para>
 	  <para>
 		Conversely, if the standby is attached follows another standby
 		with <option>recovery_min_apply_delay</option> set, that standby's replay
 		state may actually be behind that of its new downstream node.
 	  </para>
 	</note>
  </refsect1>
  <refsect1>
@@ -2633,8 +2633,6 @@ do_standby_follow(void)
 		else
 		{
 			XLogRecPtr local_xlogpos = get_current_lsn(local_conn);
 			bool can_follow = true;
 			XLogRecPtr local_min_recovery_location = InvalidXLogRecPtr;
 			/*
 			 * upstream has higher timeline - check where it forked off from this node's timeline
@@ -2650,8 +2648,6 @@ do_standby_follow(void)
 				exit(ERR_FOLLOW_FAIL);
 			}
 			local_min_recovery_location = get_min_recovery_location(config_file_options.data_directory);
 			/*
 			 * Local node has proceeded beyond the follow target's fork, so we
 			 * definitely can't attach.
@@ -2660,59 +2656,31 @@ do_standby_follow(void)
 			 * not contain all changes which are being replayed to this standby.
 			 */
 			if (local_xlogpos > follow_target_history->end)
 			{
 				can_follow = false;
 			}
 			else
 			{
 				/*
 				 * XXX can we establish what the window is where we *need* to execute
 				 * a CHECKPOINT?
 				 */
 				/*
 				 * Execute CHECKPOINT on the local node - we'll need this to update
 				 * the pg_control file so we can compare positions with the new upstream.
 				 * There is no way of avoiding this for --dry-run.
 				 */
 				if (is_superuser_connection(local_conn, NULL) == true)
 				{
 					log_notice(_("executing CHECKPOINT"));
 					checkpoint(local_conn);
 				}
 				else
 				{
 					log_warning(_("connection is not a superuser, unable to execute CHECKPOINT"));
 					log_detail(_("a CHECKPOINT is required in order to compare local and follow target states"));
 				}
 				log_debug("upstream tli: %i; branch LSN: %X/%X",
 						  follow_target_history->tli, format_lsn(follow_target_history->end));
 				if (follow_target_history->end < local_min_recovery_location)
 					can_follow = false;
 			}
 			if (can_follow == false)
 			{
 				log_error(_("this node cannot attach to follow target node %i"),
 						  follow_target_node_id);
 				log_detail(_("follow target server's timeline %i forked off current database system timeline %i before current recovery point %X/%X\n"),
 						   local_identification.timeline + 1,
 						   local_identification.timeline,
-						   format_lsn(local_min_recovery_location));
+						   format_lsn(local_xlogpos));
 				PQfinish(follow_target_conn);
 				PQfinish(follow_target_repl_conn);
 				PQfinish(local_conn);
 				exit(ERR_FOLLOW_FAIL);
 			}
 			if (runtime_options.dry_run == true)
 			{
 				log_info(_("local node %i can follow target node %i"),
 						 config_file_options.node_id,
 						 follow_target_node_id);
 				log_detail(_("local node's recovery point: %X/%X; follow target node's fork point: %X/%X"),
 						   format_lsn(local_xlogpos),
 						   format_lsn(follow_target_history->end));
 			}
 		}
 	}
 	PQfinish(local_conn);
 	PQfinish(follow_target_repl_conn);
 	if (runtime_options.dry_run == true)