From 5f986bc9814e3ff213ad294c938dc37ca2317178 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 13 Oct 2020 10:18:29 +0900 Subject: [PATCH] node rejoin: handle unclean shutdown in Pg13 From PostgreSQL 13, pg_rewind will automatically handle an unclean shutdown itself, so as long as --force-rewind was provided, so there is no need to fail with an error. Note that pg_rewind handles the unclean shutdown by starting PostgreSQL in single user mode, which it does before performing any checks as to whether a rewind is actually necessary. However pg_rewind doesn't take into account the possible presence of a standby.signal file, so we remove that and recreate it after pg_rewind was executed. --- HISTORY | 1 + doc/appendix-release-notes.xml | 9 ++++ doc/repmgr-node-rejoin.xml | 26 ++++++++--- repmgr-action-node.c | 83 ++++++++++++++++++++++++++++++++-- repmgr-action-standby.c | 48 -------------------- repmgr-client-global.h | 2 + repmgr-client.c | 52 +++++++++++++++++++++ 7 files changed, 163 insertions(+), 58 deletions(-) diff --git a/HISTORY b/HISTORY index 3fcbb2a7..fe3690da 100644 --- a/HISTORY +++ b/HISTORY @@ -12,6 +12,7 @@ provided to "node check" (Ian) repmgr: improve "node rejoin" checks (Ian) repmgr: enable "node rejoin" to join a target with a lower timeline (Ian) + repmgr: support pg_rewind's automatic crash recovery in Pg13 and later (Ian) repmgr: improve output formatting for cluster matrix/crosscheck (Ian) repmgr: improve database connection failure error checking on the demotion candidate during "standby switchover" (Ian) diff --git a/doc/appendix-release-notes.xml b/doc/appendix-release-notes.xml index aec0aab3..19cda520 100644 --- a/doc/appendix-release-notes.xml +++ b/doc/appendix-release-notes.xml @@ -115,6 +115,15 @@ + + + repmgr node rejoin: + in PostgreSQL 13 and later, support pg_rewind's + ability to automatically run crash recovery on a PostgreSQL instance + which was not shut down cleanly. + + + repmgr node check: diff --git a/doc/repmgr-node-rejoin.xml b/doc/repmgr-node-rejoin.xml index 8500db58..ba19f85a 100644 --- a/doc/repmgr-node-rejoin.xml +++ b/doc/repmgr-node-rejoin.xml @@ -212,9 +212,18 @@ a standby to the current primary, not another standby. - The node must have been shut down cleanly; if this was not the case, it will - need to be manually started (remove any existing recovery.conf file first) - until it has reached a consistent recovery point, then shut down cleanly. + The node's PostgreSQL instance must have been shut down cleanly. If this was not the + case, it will need to be started up until it has reached a consistent recovery point, + then shut down cleanly. + + + In PostgreSQL 13 and later, this will be done automatically + if the is provided (even if an actual rewind + is not necessary). + + + With PostgreSQL 12 and earlier, PostgreSQL will need to + be started and shut down manually; see below for the best way to do this. @@ -226,11 +235,14 @@ rm -f /var/lib/pgsql/data/recovery.conf postgres --single -D /var/lib/pgsql/data/ < /dev/null + + Note that standby.signal (PostgreSQL 11 and earlier: + recovery.conf) must be removed + from the data directory for PostgreSQL to be able to start in single + user mode. + - - &repmgr; will attempt to verify whether the node can rejoin as-is, or whether - pg_rewind must be used (see following section). - + diff --git a/repmgr-action-node.c b/repmgr-action-node.c index f9f22cfb..935bd6e0 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -2494,6 +2494,8 @@ do_node_rejoin(void) DBState db_state; PGPing status; bool is_shutdown = true; + int server_version_num = UNKNOWN_SERVER_VERSION_NUM; + bool hide_standby_signal = true; PQExpBufferData command; PQExpBufferData command_output; @@ -2538,6 +2540,21 @@ do_node_rejoin(void) exit(ERR_REJOIN_FAIL); } + /* + * Server version number required to determine whether pg_rewind will run + * crash recovery (Pg 13 and later). + */ + server_version_num = get_pg_version(config_file_options.data_directory, NULL); + + if (server_version_num == UNKNOWN_SERVER_VERSION_NUM) + { + /* This is very unlikely to happen */ + log_error(_("unable to determine database version")); + exit(ERR_BAD_CONFIG); + } + + log_verbose(LOG_DEBUG, "server version number is: %i", server_version_num); + /* check if cleanly shut down */ if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY) { @@ -2545,15 +2562,41 @@ do_node_rejoin(void) { log_error(_("database is still shutting down")); } + else if (server_version_num >= 130000 && runtime_options.force_rewind_used == true) + { + log_warning(_("database is not shut down cleanly")); + log_detail(_("--force-rewind provided, pg_rewind will automatically perform recovery")); + + /* + * If pg_rewind is executed, the first change it will make + * is to start the server in single user mode, which will fail + * in the presence of "standby.signal", so we'll "hide" it + * (actually delete and recreate). + */ + hide_standby_signal = true; + } else { + /* + * If the database was not shut down cleanly, it *might* rejoin correctly + * after starting up and recovering, but better to ensure the database + * can recover before trying anything else. + */ log_error(_("database is not shut down cleanly")); - if (runtime_options.force_rewind_used == true) + if (server_version_num >= 130000) { - log_detail(_("pg_rewind will not be able to run")); + log_hint(_("provide --force-rewind to run recovery")); } - log_hint(_("database should be restarted then shut down cleanly after crash recovery completes")); + else + { + if (runtime_options.force_rewind_used == true) + { + log_detail(_("pg_rewind will not be able to run")); + } + log_hint(_("database should be restarted then shut down cleanly after crash recovery completes")); + } + exit(ERR_REJOIN_FAIL); } } @@ -2757,6 +2800,30 @@ do_node_rejoin(void) log_detail(_("pg_rewind command is \"%s\""), command.data); + /* + * In Pg13 and later, pg_rewind will attempt to start up a server which + * was not cleanly shut down in single user mode. This will fail if + * "standby.signal" is present. We'll remove it and restore it after + * pg_rewind runs. + */ + if (hide_standby_signal == true) + { + char standby_signal_file_path[MAXPGPATH] = ""; + + log_notice(_("temporarily removing \"standby.signal\"")); + log_detail(_("this is required so pg_rewind can fix the unclean shutdown")); + + make_standby_signal_path(standby_signal_file_path); + + if (unlink(standby_signal_file_path) < 0 && errno != ENOENT) + { + log_error(_("unable to remove \"standby.signal\" file in data directory \"%s\""), + standby_signal_file_path); + log_detail("%s", strerror(errno)); + exit(ERR_REJOIN_FAIL); + } + } + initPQExpBuffer(&command_output); ret = local_command(command.data, @@ -2764,6 +2831,16 @@ do_node_rejoin(void) termPQExpBuffer(&command); + if (hide_standby_signal == true) + { + /* + * Restore standby.signal if we previously removed it, regardless + * of whether the pg_rewind operation failed. + */ + log_notice(_("recreating \"standby.signal\"")); + write_standby_signal(); + } + if (ret == false) { log_error(_("unable to execute pg_rewind")); diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index a16fc5df..c5f26827 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -121,7 +121,6 @@ static char *make_barman_ssh_command(char *buf); static bool create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_conninfo, int server_version_num, char *dest, bool as_file); static void write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list); -static bool write_standby_signal(void); static bool check_sibling_nodes(NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats); static bool check_free_wal_senders(int available_wal_senders, SiblingNodeStats *sibling_nodes_stats, bool *dry_run_success); @@ -7998,53 +7997,6 @@ create_recovery_file(t_node_info *node_record, t_conninfo_param_list *primary_co } -/* - * create standby.signal (PostgreSQL 12 and later) - */ - -static bool -write_standby_signal(void) -{ - char standby_signal_file_path[MAXPGPATH] = ""; - FILE *file; - mode_t um; - - snprintf(standby_signal_file_path, MAXPGPATH, - "%s/%s", - config_file_options.data_directory, - STANDBY_SIGNAL_FILE); - - /* Set umask to 0600 */ - um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO)); - file = fopen(standby_signal_file_path, "w"); - umask(um); - - if (file == NULL) - { - log_error(_("unable to create %s file at \"%s\""), - STANDBY_SIGNAL_FILE, - standby_signal_file_path); - log_detail("%s", strerror(errno)); - - return false; - } - - if (fputs("# created by repmgr\n", file) == EOF) - { - log_error(_("unable to write to %s file at \"%s\""), - STANDBY_SIGNAL_FILE, - standby_signal_file_path); - fclose(file); - - return false; - } - - fclose(file); - - return true; -} - - static void write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list) { diff --git a/repmgr-client-global.h b/repmgr-client-global.h index 9d3211a1..57623a60 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -282,6 +282,8 @@ extern void get_node_config_directory(char *config_dir_buf); extern void get_node_data_directory(char *data_dir_buf); extern void init_node_record(t_node_info *node_record); extern bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason); +extern void make_standby_signal_path(char *buf); +extern bool write_standby_signal(void); extern bool create_replication_slot(PGconn *conn, char *slot_name, t_node_info *upstream_node_record, PQExpBufferData *error_msg); extern bool drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name); diff --git a/repmgr-client.c b/repmgr-client.c index f1c34efb..b44ba7d5 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -3640,6 +3640,58 @@ can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *rea } +void +make_standby_signal_path(char *buf) +{ + snprintf(buf, MAXPGPATH, + "%s/%s", + config_file_options.data_directory, + STANDBY_SIGNAL_FILE); +} + +/* + * create standby.signal (PostgreSQL 12 and later) + */ +bool +write_standby_signal(void) +{ + char standby_signal_file_path[MAXPGPATH] = ""; + FILE *file; + mode_t um; + + make_standby_signal_path(standby_signal_file_path); + + /* Set umask to 0600 */ + um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO)); + file = fopen(standby_signal_file_path, "w"); + umask(um); + + if (file == NULL) + { + log_error(_("unable to create %s file at \"%s\""), + STANDBY_SIGNAL_FILE, + standby_signal_file_path); + log_detail("%s", strerror(errno)); + + return false; + } + + if (fputs("# created by repmgr\n", file) == EOF) + { + log_error(_("unable to write to %s file at \"%s\""), + STANDBY_SIGNAL_FILE, + standby_signal_file_path); + fclose(file); + + return false; + } + + fclose(file); + + return true; +} + + /* * NOTE: * - the provided connection should be for the normal repmgr user