mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-26 16:46:28 +00:00
node rejoin: handle unclean shutdown in Pg13
From PostgreSQL 13, pg_rewind will automatically handle an unclean shutdown itself, so as long as --force-rewind was provided, so there is no need to fail with an error. Note that pg_rewind handles the unclean shutdown by starting PostgreSQL in single user mode, which it does before performing any checks as to whether a rewind is actually necessary. However pg_rewind doesn't take into account the possible presence of a standby.signal file, so we remove that and recreate it after pg_rewind was executed.
This commit is contained in:
1
HISTORY
1
HISTORY
@@ -12,6 +12,7 @@
|
|||||||
provided to "node check" (Ian)
|
provided to "node check" (Ian)
|
||||||
repmgr: improve "node rejoin" checks (Ian)
|
repmgr: improve "node rejoin" checks (Ian)
|
||||||
repmgr: enable "node rejoin" to join a target with a lower timeline (Ian)
|
repmgr: enable "node rejoin" to join a target with a lower timeline (Ian)
|
||||||
|
repmgr: support pg_rewind's automatic crash recovery in Pg13 and later (Ian)
|
||||||
repmgr: improve output formatting for cluster matrix/crosscheck (Ian)
|
repmgr: improve output formatting for cluster matrix/crosscheck (Ian)
|
||||||
repmgr: improve database connection failure error checking on the
|
repmgr: improve database connection failure error checking on the
|
||||||
demotion candidate during "standby switchover" (Ian)
|
demotion candidate during "standby switchover" (Ian)
|
||||||
|
|||||||
@@ -115,6 +115,15 @@
|
|||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-node-rejoin">repmgr node rejoin</link>:
|
||||||
|
in PostgreSQL 13 and later, support <application>pg_rewind</application>'s
|
||||||
|
ability to automatically run crash recovery on a PostgreSQL instance
|
||||||
|
which was not shut down cleanly.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
<link linkend="repmgr-node-check">repmgr node check</link>:
|
<link linkend="repmgr-node-check">repmgr node check</link>:
|
||||||
|
|||||||
@@ -212,9 +212,18 @@
|
|||||||
a standby to the current primary, not another standby.
|
a standby to the current primary, not another standby.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
The node must have been shut down cleanly; if this was not the case, it will
|
The node's PostgreSQL instance must have been shut down cleanly. If this was not the
|
||||||
need to be manually started (remove any existing <filename>recovery.conf</filename> file first)
|
case, it will need to be started up until it has reached a consistent recovery point,
|
||||||
until it has reached a consistent recovery point, then shut down cleanly.
|
then shut down cleanly.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
In PostgreSQL 13 and later, this will be done automatically
|
||||||
|
if the <option>--force-rewind</option> is provided (even if an actual rewind
|
||||||
|
is not necessary).
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
With PostgreSQL 12 and earlier, PostgreSQL will need to
|
||||||
|
be started and shut down manually; see below for the best way to do this.
|
||||||
</para>
|
</para>
|
||||||
<tip>
|
<tip>
|
||||||
<para>
|
<para>
|
||||||
@@ -226,11 +235,14 @@
|
|||||||
rm -f /var/lib/pgsql/data/recovery.conf
|
rm -f /var/lib/pgsql/data/recovery.conf
|
||||||
postgres --single -D /var/lib/pgsql/data/ < /dev/null</programlisting>
|
postgres --single -D /var/lib/pgsql/data/ < /dev/null</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that <filename>standby.signal</filename> (PostgreSQL 11 and earlier:
|
||||||
|
<filename>recovery.conf</filename>) <emphasis>must</emphasis> be removed
|
||||||
|
from the data directory for PostgreSQL to be able to start in single
|
||||||
|
user mode.
|
||||||
|
</para>
|
||||||
</tip>
|
</tip>
|
||||||
<para>
|
|
||||||
&repmgr; will attempt to verify whether the node can rejoin as-is, or whether
|
|
||||||
<command>pg_rewind</command> must be used (see following section).
|
|
||||||
</para>
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1 id="repmgr-node-rejoin-pg-rewind" xreflabel="Using pg_rewind">
|
<refsect1 id="repmgr-node-rejoin-pg-rewind" xreflabel="Using pg_rewind">
|
||||||
|
|||||||
@@ -2494,6 +2494,8 @@ do_node_rejoin(void)
|
|||||||
DBState db_state;
|
DBState db_state;
|
||||||
PGPing status;
|
PGPing status;
|
||||||
bool is_shutdown = true;
|
bool is_shutdown = true;
|
||||||
|
int server_version_num = UNKNOWN_SERVER_VERSION_NUM;
|
||||||
|
bool hide_standby_signal = true;
|
||||||
|
|
||||||
PQExpBufferData command;
|
PQExpBufferData command;
|
||||||
PQExpBufferData command_output;
|
PQExpBufferData command_output;
|
||||||
@@ -2538,6 +2540,21 @@ do_node_rejoin(void)
|
|||||||
exit(ERR_REJOIN_FAIL);
|
exit(ERR_REJOIN_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Server version number required to determine whether pg_rewind will run
|
||||||
|
* crash recovery (Pg 13 and later).
|
||||||
|
*/
|
||||||
|
server_version_num = get_pg_version(config_file_options.data_directory, NULL);
|
||||||
|
|
||||||
|
if (server_version_num == UNKNOWN_SERVER_VERSION_NUM)
|
||||||
|
{
|
||||||
|
/* This is very unlikely to happen */
|
||||||
|
log_error(_("unable to determine database version"));
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "server version number is: %i", server_version_num);
|
||||||
|
|
||||||
/* check if cleanly shut down */
|
/* check if cleanly shut down */
|
||||||
if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY)
|
if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY)
|
||||||
{
|
{
|
||||||
@@ -2545,15 +2562,41 @@ do_node_rejoin(void)
|
|||||||
{
|
{
|
||||||
log_error(_("database is still shutting down"));
|
log_error(_("database is still shutting down"));
|
||||||
}
|
}
|
||||||
|
else if (server_version_num >= 130000 && runtime_options.force_rewind_used == true)
|
||||||
|
{
|
||||||
|
log_warning(_("database is not shut down cleanly"));
|
||||||
|
log_detail(_("--force-rewind provided, pg_rewind will automatically perform recovery"));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If pg_rewind is executed, the first change it will make
|
||||||
|
* is to start the server in single user mode, which will fail
|
||||||
|
* in the presence of "standby.signal", so we'll "hide" it
|
||||||
|
* (actually delete and recreate).
|
||||||
|
*/
|
||||||
|
hide_standby_signal = true;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* If the database was not shut down cleanly, it *might* rejoin correctly
|
||||||
|
* after starting up and recovering, but better to ensure the database
|
||||||
|
* can recover before trying anything else.
|
||||||
|
*/
|
||||||
log_error(_("database is not shut down cleanly"));
|
log_error(_("database is not shut down cleanly"));
|
||||||
|
|
||||||
if (runtime_options.force_rewind_used == true)
|
if (server_version_num >= 130000)
|
||||||
{
|
{
|
||||||
log_detail(_("pg_rewind will not be able to run"));
|
log_hint(_("provide --force-rewind to run recovery"));
|
||||||
}
|
}
|
||||||
log_hint(_("database should be restarted then shut down cleanly after crash recovery completes"));
|
else
|
||||||
|
{
|
||||||
|
if (runtime_options.force_rewind_used == true)
|
||||||
|
{
|
||||||
|
log_detail(_("pg_rewind will not be able to run"));
|
||||||
|
}
|
||||||
|
log_hint(_("database should be restarted then shut down cleanly after crash recovery completes"));
|
||||||
|
}
|
||||||
|
|
||||||
exit(ERR_REJOIN_FAIL);
|
exit(ERR_REJOIN_FAIL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2757,6 +2800,30 @@ do_node_rejoin(void)
|
|||||||
log_detail(_("pg_rewind command is \"%s\""),
|
log_detail(_("pg_rewind command is \"%s\""),
|
||||||
command.data);
|
command.data);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In Pg13 and later, pg_rewind will attempt to start up a server which
|
||||||
|
* was not cleanly shut down in single user mode. This will fail if
|
||||||
|
* "standby.signal" is present. We'll remove it and restore it after
|
||||||
|
* pg_rewind runs.
|
||||||
|
*/
|
||||||
|
if (hide_standby_signal == true)
|
||||||
|
{
|
||||||
|
char standby_signal_file_path[MAXPGPATH] = "";
|
||||||
|
|
||||||
|
log_notice(_("temporarily removing \"standby.signal\""));
|
||||||
|
log_detail(_("this is required so pg_rewind can fix the unclean shutdown"));
|
||||||
|
|
||||||
|
make_standby_signal_path(standby_signal_file_path);
|
||||||
|
|
||||||
|
if (unlink(standby_signal_file_path) < 0 && errno != ENOENT)
|
||||||
|
{
|
||||||
|
log_error(_("unable to remove \"standby.signal\" file in data directory \"%s\""),
|
||||||
|
standby_signal_file_path);
|
||||||
|
log_detail("%s", strerror(errno));
|
||||||
|
exit(ERR_REJOIN_FAIL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
initPQExpBuffer(&command_output);
|
initPQExpBuffer(&command_output);
|
||||||
|
|
||||||
ret = local_command(command.data,
|
ret = local_command(command.data,
|
||||||
@@ -2764,6 +2831,16 @@ do_node_rejoin(void)
|
|||||||
|
|
||||||
termPQExpBuffer(&command);
|
termPQExpBuffer(&command);
|
||||||
|
|
||||||
|
if (hide_standby_signal == true)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Restore standby.signal if we previously removed it, regardless
|
||||||
|
* of whether the pg_rewind operation failed.
|
||||||
|
*/
|
||||||
|
log_notice(_("recreating \"standby.signal\""));
|
||||||
|
write_standby_signal();
|
||||||
|
}
|
||||||
|
|
||||||
if (ret == false)
|
if (ret == false)
|
||||||
{
|
{
|
||||||
log_error(_("unable to execute pg_rewind"));
|
log_error(_("unable to execute pg_rewind"));
|
||||||
|
|||||||
@@ -121,7 +121,6 @@ static char *make_barman_ssh_command(char *buf);
|
|||||||
|
|
||||||
static bool create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_conninfo, int server_version_num, char *dest, bool as_file);
|
static bool create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_conninfo, int server_version_num, char *dest, bool as_file);
|
||||||
static void write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list);
|
static void write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list);
|
||||||
static bool write_standby_signal(void);
|
|
||||||
|
|
||||||
static bool check_sibling_nodes(NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats);
|
static bool check_sibling_nodes(NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats);
|
||||||
static bool check_free_wal_senders(int available_wal_senders, SiblingNodeStats *sibling_nodes_stats, bool *dry_run_success);
|
static bool check_free_wal_senders(int available_wal_senders, SiblingNodeStats *sibling_nodes_stats, bool *dry_run_success);
|
||||||
@@ -7998,53 +7997,6 @@ create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_co
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* create standby.signal (PostgreSQL 12 and later)
|
|
||||||
*/
|
|
||||||
|
|
||||||
static bool
|
|
||||||
write_standby_signal(void)
|
|
||||||
{
|
|
||||||
char standby_signal_file_path[MAXPGPATH] = "";
|
|
||||||
FILE *file;
|
|
||||||
mode_t um;
|
|
||||||
|
|
||||||
snprintf(standby_signal_file_path, MAXPGPATH,
|
|
||||||
"%s/%s",
|
|
||||||
config_file_options.data_directory,
|
|
||||||
STANDBY_SIGNAL_FILE);
|
|
||||||
|
|
||||||
/* Set umask to 0600 */
|
|
||||||
um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO));
|
|
||||||
file = fopen(standby_signal_file_path, "w");
|
|
||||||
umask(um);
|
|
||||||
|
|
||||||
if (file == NULL)
|
|
||||||
{
|
|
||||||
log_error(_("unable to create %s file at \"%s\""),
|
|
||||||
STANDBY_SIGNAL_FILE,
|
|
||||||
standby_signal_file_path);
|
|
||||||
log_detail("%s", strerror(errno));
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fputs("# created by repmgr\n", file) == EOF)
|
|
||||||
{
|
|
||||||
log_error(_("unable to write to %s file at \"%s\""),
|
|
||||||
STANDBY_SIGNAL_FILE,
|
|
||||||
standby_signal_file_path);
|
|
||||||
fclose(file);
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(file);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list)
|
write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -282,6 +282,8 @@ extern void get_node_config_directory(char *config_dir_buf);
|
|||||||
extern void get_node_data_directory(char *data_dir_buf);
|
extern void get_node_data_directory(char *data_dir_buf);
|
||||||
extern void init_node_record(t_node_info *node_record);
|
extern void init_node_record(t_node_info *node_record);
|
||||||
extern bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
|
extern bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
|
||||||
|
extern void make_standby_signal_path(char *buf);
|
||||||
|
extern bool write_standby_signal(void);
|
||||||
|
|
||||||
extern bool create_replication_slot(PGconn *conn, char *slot_name, t_node_info *upstream_node_record, PQExpBufferData *error_msg);
|
extern bool create_replication_slot(PGconn *conn, char *slot_name, t_node_info *upstream_node_record, PQExpBufferData *error_msg);
|
||||||
extern bool drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name);
|
extern bool drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name);
|
||||||
|
|||||||
@@ -3640,6 +3640,58 @@ can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *rea
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
make_standby_signal_path(char *buf)
|
||||||
|
{
|
||||||
|
snprintf(buf, MAXPGPATH,
|
||||||
|
"%s/%s",
|
||||||
|
config_file_options.data_directory,
|
||||||
|
STANDBY_SIGNAL_FILE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* create standby.signal (PostgreSQL 12 and later)
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
write_standby_signal(void)
|
||||||
|
{
|
||||||
|
char standby_signal_file_path[MAXPGPATH] = "";
|
||||||
|
FILE *file;
|
||||||
|
mode_t um;
|
||||||
|
|
||||||
|
make_standby_signal_path(standby_signal_file_path);
|
||||||
|
|
||||||
|
/* Set umask to 0600 */
|
||||||
|
um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO));
|
||||||
|
file = fopen(standby_signal_file_path, "w");
|
||||||
|
umask(um);
|
||||||
|
|
||||||
|
if (file == NULL)
|
||||||
|
{
|
||||||
|
log_error(_("unable to create %s file at \"%s\""),
|
||||||
|
STANDBY_SIGNAL_FILE,
|
||||||
|
standby_signal_file_path);
|
||||||
|
log_detail("%s", strerror(errno));
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fputs("# created by repmgr\n", file) == EOF)
|
||||||
|
{
|
||||||
|
log_error(_("unable to write to %s file at \"%s\""),
|
||||||
|
STANDBY_SIGNAL_FILE,
|
||||||
|
standby_signal_file_path);
|
||||||
|
fclose(file);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(file);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NOTE:
|
* NOTE:
|
||||||
* - the provided connection should be for the normal repmgr user
|
* - the provided connection should be for the normal repmgr user
|
||||||
|
|||||||
Reference in New Issue
Block a user