From df425a38b7201339f2d64aa4e94ee86f7174b73a Mon Sep 17 00:00:00 2001
From: Ian Barwick <ian@2ndquadrant.com>
Date: Wed, 9 Aug 2017 13:26:27 +0900
Subject: [PATCH] Refactor "standby follow" functionality

"standby follow" was originally co-opted to start up a demoted node;
this functionality is now delegated to "node rejoin", with the core
functionality of "standby follow" implemented as an internal function.
---
 repmgr-action-node.c    | 165 +++++++++++++++++----------
 repmgr-action-standby.c | 244 ++++++++++++++++------------------------
 repmgr-action-standby.h |   1 +
 repmgr-client.c         |   9 ++
 4 files changed, 215 insertions(+), 204 deletions(-)

diff --git a/repmgr-action-node.c b/repmgr-action-node.c
index 55c8e8a0..b0cf4679 100644
--- a/repmgr-action-node.c
+++ b/repmgr-action-node.c
@@ -17,6 +17,7 @@
 
 #include "repmgr-client-global.h"
 #include "repmgr-action-node.h"
+#include "repmgr-action-standby.h"
 
 static bool copy_file(const char *src_file, const char *dest_file);
 static void format_archive_dir(char *archive_dir);
@@ -916,14 +917,18 @@ parse_server_action(const char *action_name)
 void
 do_node_rejoin(void)
 {
-	PQExpBufferData command;
-	PQExpBufferData command_output;
-	struct stat statbuf;
-	char filebuf[MAXPGPATH];
+	PGconn *upstream_conn = NULL;
+	RecoveryType upstream_recovery_type = RECTYPE_UNKNOWN;
 	DBState db_state;
 	PGPing status;
 	bool is_shutdown = true;
 
+	PQExpBufferData command;
+	PQExpBufferData command_output;
+	struct stat statbuf;
+	char filebuf[MAXPGPATH] = "";
+	t_node_info primary_node_record = T_NODE_INFO_INITIALIZER;
+
 	/* check node is not actually running */
 
 	status = PQping(config_file_options.conninfo);
@@ -950,75 +955,121 @@ do_node_rejoin(void)
 	{
 		log_error(_("database is still running in state \"%s\""),
 				  describe_db_state(db_state));
+		log_hint(_("\"repmgr node rejoin\" cannot be executed on a running node"));
 		exit(ERR_BAD_CONFIG);
 	}
 
+	/* check if cleanly shut down */
 	if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY)
 	{
-		log_error(_("database is not shut down cleanly, pg_rewind will not be able to run"));
+		log_error(_("database is not shut down cleanly"));
+
+		if (runtime_options.force_rewind == true)
+		{
+			log_detail(_("pg_rewind will not be able to run"));
+		}
+		log_hint(_("database should be restarted and shut down cleanly after crash recovery completes"));
 		exit(ERR_BAD_CONFIG);
 	}
 
-	// XXX check if cleanly shut down, pg_rewind will fail if so
 
+	/* check provided upstream connection */
+	upstream_conn = establish_db_connection(runtime_options.upstream_conninfo, true);
 
-	// XXX we can probably make this an internal function
-	do_node_archive_config();
-
-
-	/* execute pg_rewind */
-	initPQExpBuffer(&command);
-
-	appendPQExpBuffer(
-		&command,
-		"%s -D ",
-		make_pg_path("pg_rewind"));
-
-	appendShellString(
-		&command,
-		config_file_options.data_directory);
-
-	appendPQExpBuffer(
-		&command,
-		" --source-server='%s'",
-		runtime_options.upstream_conninfo);
-
-	log_notice(_("executing pg_rewind"));
-	log_debug("pg_rewind command is:\n  %s",
-			  command.data);
-
-	initPQExpBuffer(&command_output);
-
-	// XXX handle failure
-
-	(void)local_command(
-		command.data,
-		&command_output);
-
-	termPQExpBuffer(&command_output);
-	termPQExpBuffer(&command);
-
-	/* Restore any previously archived config files */
-	do_node_restore_config();
-
-
-	/* remove any recovery.done file copied in by pg_rewind */
-	snprintf(filebuf, MAXPGPATH,
-			 "%s/recovery.done",
-			 config_file_options.data_directory);
-
-	if (stat(filebuf, &statbuf) == 0)
+	if (get_primary_node_record(upstream_conn, &primary_node_record) == false)
 	{
-		log_verbose(LOG_INFO, _("deleting \"recovery.done\""));
+		log_error(_("unable to retrieve primary node record"));
+		PQfinish(upstream_conn);
+	}
 
-		if (unlink(filebuf) == -1)
+	PQfinish(upstream_conn);
+
+	/* connect to registered primary and check it's not in recovery */
+	upstream_conn = establish_db_connection(primary_node_record.conninfo, true);
+
+	upstream_recovery_type = get_recovery_type(upstream_conn);
+
+	if (upstream_recovery_type != RECTYPE_PRIMARY)
+	{
+		log_error(_("primary server is registered node \"%s\" (ID: %i), but server is not a primary"),
+				  primary_node_record.node_name,
+				  primary_node_record.node_id);
+		/* TODO: hint about checking cluster */
+		PQfinish(upstream_conn);
+
+		exit(ERR_BAD_CONFIG);
+	}
+
+	/*
+	 * Forcibly rewind node if requested (this is mainly for use when
+	 * this action is being executed by "repmgr standby switchover")
+	 */
+	if (runtime_options.force_rewind == true)
+	{
+		int ret;
+
+		// XXX we can probably make this an internal function
+		do_node_archive_config();
+
+		/* execute pg_rewind */
+		initPQExpBuffer(&command);
+
+		appendPQExpBuffer(
+			&command,
+			"%s -D ",
+			make_pg_path("pg_rewind"));
+
+		appendShellString(
+			&command,
+			config_file_options.data_directory);
+
+		appendPQExpBuffer(
+			&command,
+			" --source-server='%s'",
+			runtime_options.upstream_conninfo);
+
+		log_notice(_("executing pg_rewind"));
+		log_debug("pg_rewind command is:\n  %s",
+				  command.data);
+
+		initPQExpBuffer(&command_output);
+
+		ret = local_command(
+			command.data,
+			&command_output);
+
+		termPQExpBuffer(&command_output);
+		termPQExpBuffer(&command);
+
+		if (ret != 0)
 		{
-			log_warning(_("unable to delete \"%s\""),
-						filebuf);
-			log_detail("%s", strerror(errno));
+			log_error(_("unable to execute pg_rewind"));
+			log_detail(_("see preceding output for details"));
+			exit(ERR_BAD_CONFIG);
+		}
+		/* Restore any previously archived config files */
+		do_node_restore_config();
+
+		/* remove any recovery.done file copied in by pg_rewind */
+		snprintf(filebuf, MAXPGPATH,
+				 "%s/recovery.done",
+				 config_file_options.data_directory);
+
+		if (stat(filebuf, &statbuf) == 0)
+		{
+			log_verbose(LOG_INFO, _("deleting \"recovery.done\""));
+
+			if (unlink(filebuf) == -1)
+			{
+				log_warning(_("unable to delete \"%s\""),
+							filebuf);
+				log_detail("%s", strerror(errno));
+			}
 		}
 	}
 
+	do_standby_follow_internal(upstream_conn, &primary_node_record);
+
 }
 
 /*
diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c
index 3ce89034..15736aac 100644
--- a/repmgr-action-standby.c
+++ b/repmgr-action-standby.c
@@ -1211,127 +1211,83 @@ _do_standby_promote_internal(const char *data_dir)
 /*
  * Follow a new primary.
  *
- * This function has two "modes":
- *  1) no primary info provided - determine primary from standby metadata
- *  2) primary info provided - use that info to connect to the primary.
- *
- * (2) is mainly for when a node has been stopped as part of a switchover
- * and needs to be started with recovery.conf correctly configured.
+ * Node must be running. To start an inactive node and point it at a
+ * new primary, use "repmgr node rejoin".
  */
 
 void
 do_standby_follow(void)
 {
-	PGconn	   *local_conn;
-	t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
-	int			original_upstream_node_id = UNKNOWN_NODE_ID;
+	PGconn	   *local_conn = NULL;
 
 	PGconn	   *primary_conn = NULL;
 	int			primary_id = UNKNOWN_NODE_ID;
 	t_node_info primary_node_record = T_NODE_INFO_INITIALIZER;
 
-	char		data_dir[MAXPGPATH];
-	t_conninfo_param_list recovery_conninfo;
-	char	   *errmsg = NULL;
-
-	RecordStatus record_status;
-
-	char		restart_command[MAXLEN];
-	int			r;
-
-	PQExpBufferData event_details;
+	int	    	timer;
 
 	log_verbose(LOG_DEBUG, "do_standby_follow()");
 
+
+	local_conn = establish_db_connection(config_file_options.conninfo, true);
+
+	log_verbose(LOG_INFO, _("connected to local node"));
+
+	/* check this is a standby */
+	check_recovery_type(local_conn);
+
 	/*
-	 * If -h/--host wasn't provided, attempt to connect to standby
-	 * to determine primary, and carry out some other checks while we're
-	 * at it.
-	 */
-	if (runtime_options.host_param_provided == false)
-	{
-		bool	    success;
-		int	    	timer;
-
-		local_conn = establish_db_connection(config_file_options.conninfo, true);
-
-		log_verbose(LOG_INFO, _("connected to local node"));
-
-		check_recovery_type(local_conn);
-
-		success = get_pg_setting(local_conn, "data_directory", data_dir);
-
-		if (success == false)
-		{
-			log_error(_("unable to determine data directory"));
-			PQfinish(local_conn);
-			exit(ERR_BAD_CONFIG);
-		}
-
-		/*
-		 * Attempt to connect to primary.
-		 *
-		 * If --wait provided, loop for up `primary_response_timeout`
-		 * seconds before giving up
-		 */
-		// XXX ??? primary_follow_timeout
-		for (timer = 0; timer < config_file_options.primary_follow_timeout; timer++)
-		{
-			primary_conn = get_primary_connection_quiet(local_conn,
-														&primary_id,
-														NULL);
-
-			if (PQstatus(primary_conn) == CONNECTION_OK || runtime_options.wait == false)
-			{
-				break;
-			}
-		}
-
-		if (PQstatus(primary_conn) != CONNECTION_OK)
-		{
-			log_error(_("unable to determine primary node"));
-			PQfinish(local_conn);
-			exit(ERR_BAD_CONFIG);
-		}
-
-		check_primary_standby_version_match(local_conn, primary_conn);
-
-		PQfinish(local_conn);
-	}
-	/* local data directory and primary server info explictly provided -
-	 * attempt to connect to that
+	 * Attempt to connect to primary.
 	 *
-	 * XXX --wait option won't be effective here
+	 * If --wait provided, loop for up `primary_response_timeout`
+	 * seconds before giving up
 	 */
-	else
+	// XXX ??? primary_follow_timeout
+	for (timer = 0; timer < config_file_options.primary_follow_timeout; timer++)
 	{
-		if (config_file_options.data_directory[0] == '\0')
-		{
-			if (runtime_options.data_dir[0] == '\0')
-			{
-				log_error(_("-D/--pgdata required when providing connection parameters for \"standby follow\""));
-				exit(ERR_BAD_CONFIG);
-			}
-			strncpy(data_dir, runtime_options.data_dir, MAXPGPATH);
-		}
-		else
-		{
-			strncpy(data_dir, config_file_options.data_directory, MAXPGPATH);
-		}
+		primary_conn = get_primary_connection_quiet(local_conn,
+													&primary_id,
+													NULL);
 
-		primary_conn = establish_db_connection_by_params(&source_conninfo, true);
-
-		primary_id = get_primary_node_id(primary_conn);
+		if (PQstatus(primary_conn) == CONNECTION_OK || runtime_options.wait == false)
+		{
+			break;
+		}
 	}
 
-	if (get_recovery_type(primary_conn) != RECTYPE_PRIMARY)
+	if (PQstatus(primary_conn) != CONNECTION_OK)
 	{
-		log_error(_("the node to follow is not a primary"));
-		// XXX log detail
-		PQfinish(primary_conn);
+		log_error(_("unable to determine primary node"));
+		PQfinish(local_conn);
 		exit(ERR_BAD_CONFIG);
 	}
 
+	check_primary_standby_version_match(local_conn, primary_conn);
+
+	PQfinish(local_conn);
+
+	get_node_record(primary_conn, primary_id, &primary_node_record);
+
+	do_standby_follow_internal(primary_conn, &primary_node_record);
+}
+
+
+/*
+ * Perform the actuall "follow" operation; this is executed by
+ * "node rejoin" too.
+ */
+void
+do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_record)
+{
+	t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
+	int			original_upstream_node_id = UNKNOWN_NODE_ID;
+	char		restart_command[MAXLEN];
+	int			r;
+
+	RecordStatus record_status;
+	PQExpBufferData event_details;
+	char	   *errmsg = NULL;
+
 
 	/*
 	 * Fetch our node record so we can write application_name, if set,
@@ -1383,20 +1339,19 @@ do_standby_follow(void)
 		termPQExpBuffer(&event_details);
 	}
 
-	get_node_record(primary_conn, primary_id, &primary_node_record);
+
 
 	/* Initialise connection parameters to write as `primary_conninfo` */
 	initialize_conninfo_params(&recovery_conninfo, false);
 
 	/* We ignore any application_name set in the primary's conninfo */
-	parse_conninfo_string(primary_node_record.conninfo, &recovery_conninfo, errmsg, true);
-
+	parse_conninfo_string(primary_node_record->conninfo, &recovery_conninfo, errmsg, true);
 
 	/* Set the default application name to this node's name */
 	param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
 
 	/* Set the replication user from the primary node record */
-	param_set(&recovery_conninfo, "user", primary_node_record.repluser);
+	param_set(&recovery_conninfo, "user", primary_node_record->repluser);
 
 
 	{
@@ -1431,14 +1386,14 @@ do_standby_follow(void)
 		}
 		else
 		{
-			original_upstream_node_id = primary_id;
+			original_upstream_node_id = primary_node_record->node_id;
 		}
 	}
 
 	log_info(_("changing node %i's primary to node %i"),
-			 config_file_options.node_id, primary_id);
+			 config_file_options.node_id, primary_node_record->node_id);
 
-	if (!create_recovery_file(&local_node_record, &recovery_conninfo, data_dir))
+	if (!create_recovery_file(&local_node_record, &recovery_conninfo, config_file_options.data_directory))
 	{
 		PQfinish(primary_conn);
 		exit(ERR_BAD_CONFIG);
@@ -1449,7 +1404,7 @@ do_standby_follow(void)
 	// XXX here check if service is running!! if not, start
 	//     ensure that problem with pg_ctl output is caught here
 
-	get_server_action(ACTION_RESTART, restart_command, data_dir);
+	get_server_action(ACTION_RESTART, restart_command, config_file_options.data_directory);
 
 	log_notice(_("restarting server using '%s'"),
 			   restart_command);
@@ -1474,6 +1429,7 @@ do_standby_follow(void)
 	{
 		t_node_info upstream_node_record  = T_NODE_INFO_INITIALIZER;
 		RecordStatus upstream_record_status;
+		PGconn	   *local_conn;
 
 		log_verbose(LOG_INFO, "attempting to remove replication slot from old upstream node %i",
 					original_upstream_node_id);
@@ -1518,7 +1474,7 @@ do_standby_follow(void)
 	if (update_node_record_status(primary_conn,
 								  config_file_options.node_id,
 								  "standby",
-								  primary_id,
+								  primary_node_record->node_id,
 								  true) == false)
 	{
 		log_error(_("unable to update upstream node"));
@@ -1527,12 +1483,13 @@ do_standby_follow(void)
 		exit(ERR_BAD_CONFIG);
 	}
 
+	// XXX return to caller
 	log_notice(_("STANDBY FOLLOW successful"));
 
 	initPQExpBuffer(&event_details);
 	appendPQExpBuffer(&event_details,
 					  _("node %i is now attached to node %i"),
-					  config_file_options.node_id, primary_id);
+					  config_file_options.node_id, primary_node_record->node_id);
 
 	create_event_notification(primary_conn,
 							  &config_file_options,
@@ -1591,6 +1548,7 @@ do_standby_switchover(void)
 	RecoveryType	recovery_type;
 	PQExpBufferData remote_command_str;
 	PQExpBufferData command_output;
+	PQExpBufferData node_rejoin_options;
 
 	int r, i;
 
@@ -2160,7 +2118,13 @@ do_standby_switchover(void)
 	/* promote standby */
 	_do_standby_promote_internal(config_file_options.data_directory);
 
-	if (1 || replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn)
+	/*
+	 * Execute `repmgr node rejoin` to create recovery.conf and start
+	 * the remote server. Additionally execute "pg_rewind", if required
+	 * and requested.
+	 */
+	initPQExpBuffer(&node_rejoin_options);
+	if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn)
 	{
 		KeyValueListCell *cell;
 		bool first_entry = true;
@@ -2170,60 +2134,46 @@ do_standby_switchover(void)
 			log_error(_("new primary diverges from former primary and --force-rewind not provided"));
 			/* TODO: "repmgr node rejoin" example, when available */
 			log_hint(_("the former primary will need to be restored manually"));
+			termPQExpBuffer(&node_rejoin_options);
 			PQfinish(local_conn);
 			exit(ERR_SWITCHOVER_FAIL);
 		}
 
-		initPQExpBuffer(&remote_command_str);
-		make_remote_repmgr_path(&remote_command_str);
-		appendPQExpBuffer(&remote_command_str,
-						  "node rejoin --upstream-conninfo='%s'",
-						  local_node_record.conninfo);
-		appendPQExpBuffer(&remote_command_str,
-						  " --config-files=");
+		appendPQExpBuffer(&node_rejoin_options,
+						  " --force-rewind --config-files=");
 
 		for (cell = remote_config_files.head; cell; cell = cell->next)
 		{
 			if (first_entry == false)
-				appendPQExpBuffer(&remote_command_str, ",");
+				appendPQExpBuffer(&node_rejoin_options, ",");
 			else
 				first_entry = false;
 
-			appendPQExpBuffer(&remote_command_str, "%s", cell->key);
+			appendPQExpBuffer(&node_rejoin_options, "%s", cell->key);
 		}
 
-		log_debug("executing:\n  \"%s\"", remote_command_str.data);
-
-		(void)remote_command(
-			remote_host,
-			runtime_options.remote_user,
-			remote_command_str.data,
-			NULL);
-
-		termPQExpBuffer(&remote_command_str);
+		appendPQExpBuffer(&node_rejoin_options, " ");
 	}
-	else
-	{
-		/*
-		 * Execute `repmgr standby follow` to create recovery.conf and start
-		 * the remote server
-		 *
-		 * XXX replace with "node rejoin"
-		 */
-		initPQExpBuffer(&remote_command_str);
-		make_remote_repmgr_path(&remote_command_str);
-		appendPQExpBuffer(&remote_command_str,
-						  " -d \\'%s\\' standby follow",
-						  local_node_record.conninfo);
-		log_debug("executing:\n  \"%s\"", remote_command_str.data);
-		(void)remote_command(
-			remote_host,
-			runtime_options.remote_user,
-			remote_command_str.data,
-			NULL);
 
-		termPQExpBuffer(&remote_command_str);
-	}
+	initPQExpBuffer(&remote_command_str);
+	make_remote_repmgr_path(&remote_command_str);
+
+	appendPQExpBuffer(&remote_command_str,
+					  "%s--upstream-conninfo=\\'%s\\' node rejoin",
+					  node_rejoin_options.data,
+					  local_node_record.conninfo);
+
+	log_debug("executing:\n  \"%s\"", remote_command_str.data);
+	(void)remote_command(
+		remote_host,
+		runtime_options.remote_user,
+		remote_command_str.data,
+		NULL);
+
+	termPQExpBuffer(&remote_command_str);
+	termPQExpBuffer(&node_rejoin_options);
+
+
 
 	/* TODO: verify this node's record was updated correctly */
 
diff --git a/repmgr-action-standby.h b/repmgr-action-standby.h
index cb6534ca..4d06ecad 100644
--- a/repmgr-action-standby.h
+++ b/repmgr-action-standby.h
@@ -11,6 +11,7 @@ extern void do_standby_register(void);
 extern void do_standby_unregister(void);
 extern void do_standby_promote(void);
 extern void do_standby_follow(void);
+extern void do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_record);
 extern void do_standby_switchover(void);
 
 
diff --git a/repmgr-client.c b/repmgr-client.c
index 9fa54e05..5f42fe7e 100644
--- a/repmgr-client.c
+++ b/repmgr-client.c
@@ -1189,6 +1189,14 @@ check_cli_parameters(const int action)
 		}
 		break;
 
+		case NODE_REJOIN:
+			if (runtime_options.upstream_conninfo[0] == '\0')
+			{
+				item_list_append(
+					&cli_errors,
+					"--upstream-conninfo must be provided with NODE REJOIN");
+			}
+			break;
 		case CLUSTER_SHOW:
 		case CLUSTER_MATRIX:
 		case CLUSTER_CROSSCHECK:
@@ -1416,6 +1424,7 @@ check_cli_parameters(const int action)
 		switch (action)
 		{
 			case STANDBY_SWITCHOVER:
+			case NODE_REJOIN:
 				break;
 			default:
 				item_list_append_format(