mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-25 08:06:29 +00:00
node rejoin: handle unclean shutdown in Pg13
From PostgreSQL 13, pg_rewind will automatically handle an unclean shutdown itself, so as long as --force-rewind was provided, so there is no need to fail with an error. Note that pg_rewind handles the unclean shutdown by starting PostgreSQL in single user mode, which it does before performing any checks as to whether a rewind is actually necessary. However pg_rewind doesn't take into account the possible presence of a standby.signal file, so we remove that and recreate it after pg_rewind was executed.
This commit is contained in:
@@ -2494,6 +2494,8 @@ do_node_rejoin(void)
|
||||
DBState db_state;
|
||||
PGPing status;
|
||||
bool is_shutdown = true;
|
||||
int server_version_num = UNKNOWN_SERVER_VERSION_NUM;
|
||||
bool hide_standby_signal = true;
|
||||
|
||||
PQExpBufferData command;
|
||||
PQExpBufferData command_output;
|
||||
@@ -2538,6 +2540,21 @@ do_node_rejoin(void)
|
||||
exit(ERR_REJOIN_FAIL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Server version number required to determine whether pg_rewind will run
|
||||
* crash recovery (Pg 13 and later).
|
||||
*/
|
||||
server_version_num = get_pg_version(config_file_options.data_directory, NULL);
|
||||
|
||||
if (server_version_num == UNKNOWN_SERVER_VERSION_NUM)
|
||||
{
|
||||
/* This is very unlikely to happen */
|
||||
log_error(_("unable to determine database version"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
log_verbose(LOG_DEBUG, "server version number is: %i", server_version_num);
|
||||
|
||||
/* check if cleanly shut down */
|
||||
if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY)
|
||||
{
|
||||
@@ -2545,15 +2562,41 @@ do_node_rejoin(void)
|
||||
{
|
||||
log_error(_("database is still shutting down"));
|
||||
}
|
||||
else if (server_version_num >= 130000 && runtime_options.force_rewind_used == true)
|
||||
{
|
||||
log_warning(_("database is not shut down cleanly"));
|
||||
log_detail(_("--force-rewind provided, pg_rewind will automatically perform recovery"));
|
||||
|
||||
/*
|
||||
* If pg_rewind is executed, the first change it will make
|
||||
* is to start the server in single user mode, which will fail
|
||||
* in the presence of "standby.signal", so we'll "hide" it
|
||||
* (actually delete and recreate).
|
||||
*/
|
||||
hide_standby_signal = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* If the database was not shut down cleanly, it *might* rejoin correctly
|
||||
* after starting up and recovering, but better to ensure the database
|
||||
* can recover before trying anything else.
|
||||
*/
|
||||
log_error(_("database is not shut down cleanly"));
|
||||
|
||||
if (runtime_options.force_rewind_used == true)
|
||||
if (server_version_num >= 130000)
|
||||
{
|
||||
log_detail(_("pg_rewind will not be able to run"));
|
||||
log_hint(_("provide --force-rewind to run recovery"));
|
||||
}
|
||||
log_hint(_("database should be restarted then shut down cleanly after crash recovery completes"));
|
||||
else
|
||||
{
|
||||
if (runtime_options.force_rewind_used == true)
|
||||
{
|
||||
log_detail(_("pg_rewind will not be able to run"));
|
||||
}
|
||||
log_hint(_("database should be restarted then shut down cleanly after crash recovery completes"));
|
||||
}
|
||||
|
||||
exit(ERR_REJOIN_FAIL);
|
||||
}
|
||||
}
|
||||
@@ -2757,6 +2800,30 @@ do_node_rejoin(void)
|
||||
log_detail(_("pg_rewind command is \"%s\""),
|
||||
command.data);
|
||||
|
||||
/*
|
||||
* In Pg13 and later, pg_rewind will attempt to start up a server which
|
||||
* was not cleanly shut down in single user mode. This will fail if
|
||||
* "standby.signal" is present. We'll remove it and restore it after
|
||||
* pg_rewind runs.
|
||||
*/
|
||||
if (hide_standby_signal == true)
|
||||
{
|
||||
char standby_signal_file_path[MAXPGPATH] = "";
|
||||
|
||||
log_notice(_("temporarily removing \"standby.signal\""));
|
||||
log_detail(_("this is required so pg_rewind can fix the unclean shutdown"));
|
||||
|
||||
make_standby_signal_path(standby_signal_file_path);
|
||||
|
||||
if (unlink(standby_signal_file_path) < 0 && errno != ENOENT)
|
||||
{
|
||||
log_error(_("unable to remove \"standby.signal\" file in data directory \"%s\""),
|
||||
standby_signal_file_path);
|
||||
log_detail("%s", strerror(errno));
|
||||
exit(ERR_REJOIN_FAIL);
|
||||
}
|
||||
}
|
||||
|
||||
initPQExpBuffer(&command_output);
|
||||
|
||||
ret = local_command(command.data,
|
||||
@@ -2764,6 +2831,16 @@ do_node_rejoin(void)
|
||||
|
||||
termPQExpBuffer(&command);
|
||||
|
||||
if (hide_standby_signal == true)
|
||||
{
|
||||
/*
|
||||
* Restore standby.signal if we previously removed it, regardless
|
||||
* of whether the pg_rewind operation failed.
|
||||
*/
|
||||
log_notice(_("recreating \"standby.signal\""));
|
||||
write_standby_signal();
|
||||
}
|
||||
|
||||
if (ret == false)
|
||||
{
|
||||
log_error(_("unable to execute pg_rewind"));
|
||||
|
||||
Reference in New Issue
Block a user