node rejoin: handle unclean shutdown in Pg13

From PostgreSQL 13, pg_rewind will automatically handle an unclean shutdown itself, so as long as --force-rewind was provided, so there is no need to fail with an error. Note that pg_rewind handles the unclean shutdown by starting PostgreSQL in single user mode, which it does before performing any checks as to whether a rewind is actually necessary. However pg_rewind doesn't take into account the possible presence of a standby.signal file, so we remove that and recreate it after pg_rewind was executed.
2026-07-16 14:29:05 +00:00 · 2020-10-13 10:18:29 +09:00
parent d62743ddf4
commit 5f986bc981
7 changed files with 163 additions and 58 deletions
@@ -12,6 +12,7 @@
          provided to "node check" (Ian)
        repmgr: improve "node rejoin" checks (Ian)
        repmgr: enable "node rejoin" to join a target with a lower timeline (Ian)
+        repmgr: support pg_rewind's automatic crash recovery in Pg13 and later (Ian)
        repmgr: improve output formatting for cluster matrix/crosscheck (Ian)
        repmgr: improve database connection failure error checking on the
          demotion candidate during "standby switchover" (Ian)
@@ -115,6 +115,15 @@
            </para>
          </listitem>

+          <listitem>
+            <para>
+              <link linkend="repmgr-node-rejoin">repmgr node rejoin</link>:
+              in PostgreSQL 13 and later, support <application>pg_rewind</application>'s
+              ability to automatically run crash recovery on a PostgreSQL instance
+              which was not shut down cleanly.
+            </para>
+          </listitem>
+
          <listitem>
            <para>
              <link linkend="repmgr-node-check">repmgr node check</link>:
@@ -212,9 +212,18 @@
      a standby to the current primary, not another standby.
    </para>
    <para>
-      The node must have been shut down cleanly; if this was not the case, it will
-      need to be manually started (remove any existing <filename>recovery.conf</filename> file first)
-      until it has reached a consistent recovery point, then shut down cleanly.
+      The node's PostgreSQL instance  must have been shut down cleanly. If this was not the
+      case, it will need to be started up until it has reached a consistent recovery point,
+      then shut down cleanly.
+    </para>
+    <para>
+      In PostgreSQL 13 and later, this will be done automatically
+      if the <option>--force-rewind</option> is provided (even if an actual rewind
+      is not necessary).
+    </para>
+    <para>
+      With PostgreSQL 12 and earlier, PostgreSQL will need to
+      be started and shut down manually; see below for the best way to do this.
    </para>
    <tip>
      <para>
@@ -226,11 +235,14 @@
          rm -f /var/lib/pgsql/data/recovery.conf
          postgres --single -D /var/lib/pgsql/data/ &lt; /dev/null</programlisting>
      </para>
+      <para>
+        Note that  <filename>standby.signal</filename> (PostgreSQL 11 and earlier:
+        <filename>recovery.conf</filename>) <emphasis>must</emphasis> be removed
+        from the data directory for PostgreSQL to be able to start in single
+        user mode.
+      </para>
    </tip>
-    <para>
-      &repmgr; will attempt to verify whether the node can rejoin as-is, or whether
-      <command>pg_rewind</command> must be used (see following section).
-    </para>
+
  </refsect1>

  <refsect1 id="repmgr-node-rejoin-pg-rewind" xreflabel="Using pg_rewind">
@@ -2494,6 +2494,8 @@ do_node_rejoin(void)
 	DBState		db_state;
 	PGPing		status;
 	bool		is_shutdown = true;
+	int			server_version_num = UNKNOWN_SERVER_VERSION_NUM;
+	bool		hide_standby_signal = true;

 	PQExpBufferData command;
 	PQExpBufferData command_output;
@@ -2538,6 +2540,21 @@ do_node_rejoin(void)
 		exit(ERR_REJOIN_FAIL);
 	}

+	/*
+	 * Server version number required to determine whether pg_rewind will run
+	 * crash recovery (Pg 13 and later).
+	 */
+	server_version_num = get_pg_version(config_file_options.data_directory, NULL);
+
+	if (server_version_num == UNKNOWN_SERVER_VERSION_NUM)
+	{
+		/* This is very unlikely to happen */
+		log_error(_("unable to determine database version"));
+		exit(ERR_BAD_CONFIG);
+	}
+
+	log_verbose(LOG_DEBUG, "server version number is: %i", server_version_num);
+
 	/* check if cleanly shut down */
 	if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY)
 	{
@@ -2545,15 +2562,41 @@ do_node_rejoin(void)
 		{
 			log_error(_("database is still shutting down"));
 		}
+		else if (server_version_num >= 130000 && runtime_options.force_rewind_used == true)
+		{
+			log_warning(_("database is not shut down cleanly"));
+			log_detail(_("--force-rewind provided, pg_rewind will automatically perform recovery"));
+
+			/*
+			 * If pg_rewind is executed, the first change it will make
+			 * is to start the server in single user mode, which will fail
+			 * in the presence of "standby.signal", so we'll "hide" it
+			 * (actually delete and recreate).
+			 */
+			hide_standby_signal = true;
+		}
 		else
 		{
+			/*
+			 * If the database was not shut down cleanly, it *might* rejoin correctly
+			 * after starting up and recovering, but better to ensure the database
+			 * can recover before trying anything else.
+			 */
 			log_error(_("database is not shut down cleanly"));

-			if (runtime_options.force_rewind_used == true)
+			if (server_version_num >= 130000)
 			{
-				log_detail(_("pg_rewind will not be able to run"));
+				log_hint(_("provide --force-rewind to run recovery"));
 			}
-			log_hint(_("database should be restarted then shut down cleanly after crash recovery completes"));
+			else
+			{
+				if (runtime_options.force_rewind_used == true)
+				{
+					log_detail(_("pg_rewind will not be able to run"));
+				}
+				log_hint(_("database should be restarted then shut down cleanly after crash recovery completes"));
+			}
+
 			exit(ERR_REJOIN_FAIL);
 		}
 	}
@@ -2757,6 +2800,30 @@ do_node_rejoin(void)
 			log_detail(_("pg_rewind command is \"%s\""),
 					   command.data);

+			/*
+			 * In Pg13 and later, pg_rewind will attempt to start up a server which
+			 * was not cleanly shut down in single user mode. This will fail if
+			 * "standby.signal" is present. We'll remove it and restore it after
+			 * pg_rewind runs.
+			 */
+			if (hide_standby_signal == true)
+			{
+				char	    standby_signal_file_path[MAXPGPATH] = "";
+
+				log_notice(_("temporarily removing \"standby.signal\""));
+				log_detail(_("this is required so pg_rewind can fix the unclean shutdown"));
+
+				make_standby_signal_path(standby_signal_file_path);
+
+				if (unlink(standby_signal_file_path) < 0 && errno != ENOENT)
+				{
+					log_error(_("unable to remove \"standby.signal\" file in data directory \"%s\""),
+							  standby_signal_file_path);
+					log_detail("%s", strerror(errno));
+					exit(ERR_REJOIN_FAIL);
+				}
+			}
+
 			initPQExpBuffer(&command_output);

 			ret = local_command(command.data,
@@ -2764,6 +2831,16 @@ do_node_rejoin(void)

 			termPQExpBuffer(&command);

+			if (hide_standby_signal == true)
+			{
+				/*
+				 * Restore standby.signal if we previously removed it, regardless
+				 * of whether the pg_rewind operation failed.
+				 */
+				log_notice(_("recreating \"standby.signal\""));
+				write_standby_signal();
+			}
+
 			if (ret == false)
 			{
 				log_error(_("unable to execute pg_rewind"));
@@ -121,7 +121,6 @@ static char *make_barman_ssh_command(char *buf);

 static bool create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_conninfo, int server_version_num, char *dest, bool as_file);
 static void write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list);
-static bool write_standby_signal(void);

 static bool check_sibling_nodes(NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats);
 static bool check_free_wal_senders(int available_wal_senders, SiblingNodeStats *sibling_nodes_stats, bool *dry_run_success);
@@ -7998,53 +7997,6 @@ create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_co
 }


-/*
- * create standby.signal (PostgreSQL 12 and later)
- */
-
-static bool
-write_standby_signal(void)
-{
-	char	    standby_signal_file_path[MAXPGPATH] = "";
-	FILE	   *file;
-	mode_t		um;
-
-	snprintf(standby_signal_file_path, MAXPGPATH,
-			 "%s/%s",
-			 config_file_options.data_directory,
-			 STANDBY_SIGNAL_FILE);
-
-	/* Set umask to 0600 */
-	um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO));
-	file = fopen(standby_signal_file_path, "w");
-	umask(um);
-
-	if (file == NULL)
-	{
-		log_error(_("unable to create %s file at \"%s\""),
-				  STANDBY_SIGNAL_FILE,
-				  standby_signal_file_path);
-		log_detail("%s", strerror(errno));
-
-		return false;
-	}
-
-	if (fputs("# created by repmgr\n", file) == EOF)
-	{
-		log_error(_("unable to write to %s file at \"%s\""),
-				  STANDBY_SIGNAL_FILE,
-				  standby_signal_file_path);
-		fclose(file);
-
-		return false;
-	}
-
-	fclose(file);
-
-	return true;
-}
-
-
 static void
 write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list)
 {
@@ -282,6 +282,8 @@ extern void get_node_config_directory(char *config_dir_buf);
 extern void get_node_data_directory(char *data_dir_buf);
 extern void init_node_record(t_node_info *node_record);
 extern bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
+extern void make_standby_signal_path(char *buf);
+extern bool write_standby_signal(void);

 extern bool create_replication_slot(PGconn *conn, char *slot_name, t_node_info *upstream_node_record, PQExpBufferData *error_msg);
 extern bool drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name);
@@ -3640,6 +3640,58 @@ can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *rea
 }


+void
+make_standby_signal_path(char *buf)
+{
+	snprintf(buf, MAXPGPATH,
+			 "%s/%s",
+			 config_file_options.data_directory,
+			 STANDBY_SIGNAL_FILE);
+}
+
+/*
+ * create standby.signal (PostgreSQL 12 and later)
+ */
+bool
+write_standby_signal(void)
+{
+	char	    standby_signal_file_path[MAXPGPATH] = "";
+	FILE	   *file;
+	mode_t		um;
+
+	make_standby_signal_path(standby_signal_file_path);
+
+	/* Set umask to 0600 */
+	um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO));
+	file = fopen(standby_signal_file_path, "w");
+	umask(um);
+
+	if (file == NULL)
+	{
+		log_error(_("unable to create %s file at \"%s\""),
+				  STANDBY_SIGNAL_FILE,
+				  standby_signal_file_path);
+		log_detail("%s", strerror(errno));
+
+		return false;
+	}
+
+	if (fputs("# created by repmgr\n", file) == EOF)
+	{
+		log_error(_("unable to write to %s file at \"%s\""),
+				  STANDBY_SIGNAL_FILE,
+				  standby_signal_file_path);
+		fclose(file);
+
+		return false;
+	}
+
+	fclose(file);
+
+	return true;
+}
+
+
 /*
 * NOTE:
 *  - the provided connection should be for the normal repmgr user