Various fixes to "repmgr node rejoin"

2026-05-31 19:39:04 +00:00 · 2017-09-01 11:30:31 +09:00
parent f9f05158d2
commit edb74ccef9
3 changed files with 44 additions and 14 deletions
@@ -9,8 +9,8 @@ operations.

 `repmgr 4` is a complete rewrite of the existing `repmgr` codebase.

-Supports PostgreSQL 9.5 and later; support for PostgreSQL 9.3 and 9.4 has been
-dropped. Please continue to use repmgrd 3.x for those versions.
+Supports PostgreSQL 9.4 and later; support for PostgreSQL 9.3 has been
+dropped (please continue to use repmgrd 3.3 for PostgreSQL 9.3).

 ### BDR support

@@ -1574,6 +1574,7 @@ The following commands are available:

    repmgr node status
    repmgr node check
+    repmgr node rejoin

    repmgr cluster show
    repmgr cluster matrix
@@ -1690,6 +1691,14 @@ The following commands are available:
    Individual checks can also be output in a Nagios-compatible format with
    the option `--nagios`.

+* `node rejoin`
+
+    Enables a dormant (stopped) node to be rejoined to the replication cluster.
+
+    This can optionally use `pg_rewind` to re-integrate a node which has diverged
+    from the rest of the cluster, typically a failed primary.
+
+    XXX add details

 * `cluster show`

@@ -1667,15 +1667,20 @@ do_node_rejoin(void)
 			command.data,
 			&command_output);

-		termPQExpBuffer(&command_output);
 		termPQExpBuffer(&command);

-		if (ret != 0)
+		if (ret == false)
 		{
 			log_error(_("unable to execute pg_rewind"));
-			log_detail(_("see preceding output for details"));
+			log_detail("%s", command_output.data);
+
+			termPQExpBuffer(&command_output);
+
 			exit(ERR_BAD_CONFIG);
 		}
+
+		termPQExpBuffer(&command_output);
+
 		/* Restore any previously archived config files */
 		_do_node_restore_config();

@@ -1657,7 +1657,6 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor
 		bool		server_up = is_server_available(config_file_options.conninfo);
 		char	   *action = NULL;

-		log_debug("XXX %s\n", config_file_options.conninfo);
 		if (server_up == true)
 		{
 			action = "restart";
@@ -2551,25 +2550,42 @@ do_standby_switchover(void)
 					  local_node_record.conninfo);

 	log_debug("executing:\n  \"%s\"", remote_command_str.data);
-	(void)remote_command(
+	initPQExpBuffer(&command_output);
+
+	r = remote_command(
 		remote_host,
 		runtime_options.remote_user,
 		remote_command_str.data,
-		NULL);
+		&command_output);

 	termPQExpBuffer(&remote_command_str);
 	termPQExpBuffer(&node_rejoin_options);

 	/* TODO: verify this node's record was updated correctly */

-	create_event_record(local_conn,
-						&config_file_options,
-						config_file_options.node_id,
-						"standby_switchover",
-						true,
-						NULL);
+	if (r == false)
+	{
+		log_error(_("rejoin failed %i"), r);
+		log_detail("%s", command_output.data);

+		create_event_record(local_conn,
+							&config_file_options,
+							config_file_options.node_id,
+							"standby_switchover",
+							false,
+							command_output.data);
+	}
+	else
+	{
+		create_event_record(local_conn,
+							&config_file_options,
+							config_file_options.node_id,
+							"standby_switchover",
+							true,
+							NULL);
+	}

+	termPQExpBuffer(&command_output);

 	/* clean up remote node */
 	remote_conn = establish_db_connection(remote_node_record.conninfo, false);