node rejoin: handle unclean shutdown in Pg13

From PostgreSQL 13, pg_rewind will automatically handle an unclean
shutdown itself, so as long as --force-rewind was provided, so there
is no need to fail with an error.

Note that pg_rewind handles the unclean shutdown by starting PostgreSQL
in single user mode, which it does before performing any checks as
to whether a rewind is actually necessary.

However pg_rewind doesn't take into account the possible presence
of a standby.signal file, so we remove that and recreate it after
pg_rewind was executed.
This commit is contained in:
Ian Barwick
2020-10-13 10:18:29 +09:00
parent d62743ddf4
commit 5f986bc981
7 changed files with 163 additions and 58 deletions

View File

@@ -12,6 +12,7 @@
provided to "node check" (Ian)
repmgr: improve "node rejoin" checks (Ian)
repmgr: enable "node rejoin" to join a target with a lower timeline (Ian)
repmgr: support pg_rewind's automatic crash recovery in Pg13 and later (Ian)
repmgr: improve output formatting for cluster matrix/crosscheck (Ian)
repmgr: improve database connection failure error checking on the
demotion candidate during "standby switchover" (Ian)

View File

@@ -115,6 +115,15 @@
</para>
</listitem>
<listitem>
<para>
<link linkend="repmgr-node-rejoin">repmgr node rejoin</link>:
in PostgreSQL 13 and later, support <application>pg_rewind</application>'s
ability to automatically run crash recovery on a PostgreSQL instance
which was not shut down cleanly.
</para>
</listitem>
<listitem>
<para>
<link linkend="repmgr-node-check">repmgr node check</link>:

View File

@@ -212,9 +212,18 @@
a standby to the current primary, not another standby.
</para>
<para>
The node must have been shut down cleanly; if this was not the case, it will
need to be manually started (remove any existing <filename>recovery.conf</filename> file first)
until it has reached a consistent recovery point, then shut down cleanly.
The node's PostgreSQL instance must have been shut down cleanly. If this was not the
case, it will need to be started up until it has reached a consistent recovery point,
then shut down cleanly.
</para>
<para>
In PostgreSQL 13 and later, this will be done automatically
if the <option>--force-rewind</option> is provided (even if an actual rewind
is not necessary).
</para>
<para>
With PostgreSQL 12 and earlier, PostgreSQL will need to
be started and shut down manually; see below for the best way to do this.
</para>
<tip>
<para>
@@ -226,11 +235,14 @@
rm -f /var/lib/pgsql/data/recovery.conf
postgres --single -D /var/lib/pgsql/data/ &lt; /dev/null</programlisting>
</para>
<para>
Note that <filename>standby.signal</filename> (PostgreSQL 11 and earlier:
<filename>recovery.conf</filename>) <emphasis>must</emphasis> be removed
from the data directory for PostgreSQL to be able to start in single
user mode.
</para>
</tip>
<para>
&repmgr; will attempt to verify whether the node can rejoin as-is, or whether
<command>pg_rewind</command> must be used (see following section).
</para>
</refsect1>
<refsect1 id="repmgr-node-rejoin-pg-rewind" xreflabel="Using pg_rewind">

View File

@@ -2494,6 +2494,8 @@ do_node_rejoin(void)
DBState db_state;
PGPing status;
bool is_shutdown = true;
int server_version_num = UNKNOWN_SERVER_VERSION_NUM;
bool hide_standby_signal = true;
PQExpBufferData command;
PQExpBufferData command_output;
@@ -2538,6 +2540,21 @@ do_node_rejoin(void)
exit(ERR_REJOIN_FAIL);
}
/*
* Server version number required to determine whether pg_rewind will run
* crash recovery (Pg 13 and later).
*/
server_version_num = get_pg_version(config_file_options.data_directory, NULL);
if (server_version_num == UNKNOWN_SERVER_VERSION_NUM)
{
/* This is very unlikely to happen */
log_error(_("unable to determine database version"));
exit(ERR_BAD_CONFIG);
}
log_verbose(LOG_DEBUG, "server version number is: %i", server_version_num);
/* check if cleanly shut down */
if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY)
{
@@ -2545,15 +2562,41 @@ do_node_rejoin(void)
{
log_error(_("database is still shutting down"));
}
else if (server_version_num >= 130000 && runtime_options.force_rewind_used == true)
{
log_warning(_("database is not shut down cleanly"));
log_detail(_("--force-rewind provided, pg_rewind will automatically perform recovery"));
/*
* If pg_rewind is executed, the first change it will make
* is to start the server in single user mode, which will fail
* in the presence of "standby.signal", so we'll "hide" it
* (actually delete and recreate).
*/
hide_standby_signal = true;
}
else
{
/*
* If the database was not shut down cleanly, it *might* rejoin correctly
* after starting up and recovering, but better to ensure the database
* can recover before trying anything else.
*/
log_error(_("database is not shut down cleanly"));
if (runtime_options.force_rewind_used == true)
if (server_version_num >= 130000)
{
log_detail(_("pg_rewind will not be able to run"));
log_hint(_("provide --force-rewind to run recovery"));
}
log_hint(_("database should be restarted then shut down cleanly after crash recovery completes"));
else
{
if (runtime_options.force_rewind_used == true)
{
log_detail(_("pg_rewind will not be able to run"));
}
log_hint(_("database should be restarted then shut down cleanly after crash recovery completes"));
}
exit(ERR_REJOIN_FAIL);
}
}
@@ -2757,6 +2800,30 @@ do_node_rejoin(void)
log_detail(_("pg_rewind command is \"%s\""),
command.data);
/*
* In Pg13 and later, pg_rewind will attempt to start up a server which
* was not cleanly shut down in single user mode. This will fail if
* "standby.signal" is present. We'll remove it and restore it after
* pg_rewind runs.
*/
if (hide_standby_signal == true)
{
char standby_signal_file_path[MAXPGPATH] = "";
log_notice(_("temporarily removing \"standby.signal\""));
log_detail(_("this is required so pg_rewind can fix the unclean shutdown"));
make_standby_signal_path(standby_signal_file_path);
if (unlink(standby_signal_file_path) < 0 && errno != ENOENT)
{
log_error(_("unable to remove \"standby.signal\" file in data directory \"%s\""),
standby_signal_file_path);
log_detail("%s", strerror(errno));
exit(ERR_REJOIN_FAIL);
}
}
initPQExpBuffer(&command_output);
ret = local_command(command.data,
@@ -2764,6 +2831,16 @@ do_node_rejoin(void)
termPQExpBuffer(&command);
if (hide_standby_signal == true)
{
/*
* Restore standby.signal if we previously removed it, regardless
* of whether the pg_rewind operation failed.
*/
log_notice(_("recreating \"standby.signal\""));
write_standby_signal();
}
if (ret == false)
{
log_error(_("unable to execute pg_rewind"));

View File

@@ -121,7 +121,6 @@ static char *make_barman_ssh_command(char *buf);
static bool create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_conninfo, int server_version_num, char *dest, bool as_file);
static void write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list);
static bool write_standby_signal(void);
static bool check_sibling_nodes(NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats);
static bool check_free_wal_senders(int available_wal_senders, SiblingNodeStats *sibling_nodes_stats, bool *dry_run_success);
@@ -7998,53 +7997,6 @@ create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_co
}
/*
* create standby.signal (PostgreSQL 12 and later)
*/
static bool
write_standby_signal(void)
{
char standby_signal_file_path[MAXPGPATH] = "";
FILE *file;
mode_t um;
snprintf(standby_signal_file_path, MAXPGPATH,
"%s/%s",
config_file_options.data_directory,
STANDBY_SIGNAL_FILE);
/* Set umask to 0600 */
um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO));
file = fopen(standby_signal_file_path, "w");
umask(um);
if (file == NULL)
{
log_error(_("unable to create %s file at \"%s\""),
STANDBY_SIGNAL_FILE,
standby_signal_file_path);
log_detail("%s", strerror(errno));
return false;
}
if (fputs("# created by repmgr\n", file) == EOF)
{
log_error(_("unable to write to %s file at \"%s\""),
STANDBY_SIGNAL_FILE,
standby_signal_file_path);
fclose(file);
return false;
}
fclose(file);
return true;
}
static void
write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list)
{

View File

@@ -282,6 +282,8 @@ extern void get_node_config_directory(char *config_dir_buf);
extern void get_node_data_directory(char *data_dir_buf);
extern void init_node_record(t_node_info *node_record);
extern bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
extern void make_standby_signal_path(char *buf);
extern bool write_standby_signal(void);
extern bool create_replication_slot(PGconn *conn, char *slot_name, t_node_info *upstream_node_record, PQExpBufferData *error_msg);
extern bool drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name);

View File

@@ -3640,6 +3640,58 @@ can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *rea
}
void
make_standby_signal_path(char *buf)
{
snprintf(buf, MAXPGPATH,
"%s/%s",
config_file_options.data_directory,
STANDBY_SIGNAL_FILE);
}
/*
* create standby.signal (PostgreSQL 12 and later)
*/
bool
write_standby_signal(void)
{
char standby_signal_file_path[MAXPGPATH] = "";
FILE *file;
mode_t um;
make_standby_signal_path(standby_signal_file_path);
/* Set umask to 0600 */
um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO));
file = fopen(standby_signal_file_path, "w");
umask(um);
if (file == NULL)
{
log_error(_("unable to create %s file at \"%s\""),
STANDBY_SIGNAL_FILE,
standby_signal_file_path);
log_detail("%s", strerror(errno));
return false;
}
if (fputs("# created by repmgr\n", file) == EOF)
{
log_error(_("unable to write to %s file at \"%s\""),
STANDBY_SIGNAL_FILE,
standby_signal_file_path);
fclose(file);
return false;
}
fclose(file);
return true;
}
/*
* NOTE:
* - the provided connection should be for the normal repmgr user