From b1e544f9623b05dadff294916c178780c45ea935 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 9 Aug 2017 12:09:37 +0900 Subject: [PATCH] Enable use of pg_rewind during switchover operations But only if required and --force-rewind required, and pg_rewind can actually be used. --- repmgr-action-node.c | 213 ++++++++++++++++++++++++++++++++++++---- repmgr-action-node.h | 2 +- repmgr-action-standby.c | 90 ++++++++++++----- repmgr-client-global.h | 5 + repmgr-client.c | 54 ++++++++-- repmgr-client.h | 19 ++-- 6 files changed, 324 insertions(+), 59 deletions(-) diff --git a/repmgr-action-node.c b/repmgr-action-node.c index 7b673b51..55c8e8a0 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -13,6 +13,7 @@ #include "controldata.h" #include "dirutil.h" #include "dbutils.h" +#include "compat.h" #include "repmgr-client-global.h" #include "repmgr-action-node.h" @@ -903,6 +904,123 @@ parse_server_action(const char *action_name) } + +/* + * Intended mainly for "internal" use by `node switchover`, which + * calls this on the target server to excute pg_rewind on a demoted + * primary with a forked (sic) timeline. Does not currently check + * whether this is a useful thing to do. + * + * TODO: make this into a more generally useful function. + */ +void +do_node_rejoin(void) +{ + PQExpBufferData command; + PQExpBufferData command_output; + struct stat statbuf; + char filebuf[MAXPGPATH]; + DBState db_state; + PGPing status; + bool is_shutdown = true; + + /* check node is not actually running */ + + status = PQping(config_file_options.conninfo); + + switch (status) + { + case PQPING_NO_ATTEMPT: + log_error(_("unable to determine status of server")); + exit(ERR_BAD_CONFIG); + case PQPING_OK: + is_shutdown = false; + break; + case PQPING_REJECT: + is_shutdown = false; + break; + case PQPING_NO_RESPONSE: + /* status not yet clear */ + break; + } + + db_state = get_db_state(config_file_options.data_directory); + + if (is_shutdown == false) + { + log_error(_("database is still running in state \"%s\""), + describe_db_state(db_state)); + exit(ERR_BAD_CONFIG); + } + + if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY) + { + log_error(_("database is not shut down cleanly, pg_rewind will not be able to run")); + exit(ERR_BAD_CONFIG); + } + + // XXX check if cleanly shut down, pg_rewind will fail if so + + + // XXX we can probably make this an internal function + do_node_archive_config(); + + + /* execute pg_rewind */ + initPQExpBuffer(&command); + + appendPQExpBuffer( + &command, + "%s -D ", + make_pg_path("pg_rewind")); + + appendShellString( + &command, + config_file_options.data_directory); + + appendPQExpBuffer( + &command, + " --source-server='%s'", + runtime_options.upstream_conninfo); + + log_notice(_("executing pg_rewind")); + log_debug("pg_rewind command is:\n %s", + command.data); + + initPQExpBuffer(&command_output); + + // XXX handle failure + + (void)local_command( + command.data, + &command_output); + + termPQExpBuffer(&command_output); + termPQExpBuffer(&command); + + /* Restore any previously archived config files */ + do_node_restore_config(); + + + /* remove any recovery.done file copied in by pg_rewind */ + snprintf(filebuf, MAXPGPATH, + "%s/recovery.done", + config_file_options.data_directory); + + if (stat(filebuf, &statbuf) == 0) + { + log_verbose(LOG_INFO, _("deleting \"recovery.done\"")); + + if (unlink(filebuf) == -1) + { + log_warning(_("unable to delete \"%s\""), + filebuf); + log_detail("%s", strerror(errno)); + } + } + +} + /* * Intended mainly for "internal" use by `node switchover`, which * calls this on the target server to archive any configuration files @@ -919,7 +1037,7 @@ do_node_archive_config(void) struct dirent *arcdir_ent; DIR *arcdir; - PGconn *local_conn = NULL; + KeyValueList config_files = { NULL, NULL }; KeyValueListCell *cell; int copied_count = 0; @@ -988,8 +1106,8 @@ do_node_archive_config(void) if (unlink(arcdir_ent_path) == -1) { - log_error(_("unable to create temporary archive directory \"%s\""), - archive_dir); + log_error(_("unable to delete file in temporary archive directory")); + log_detail(_("file is: \"%s\""), arcdir_ent_path); log_detail("%s", strerror(errno)); closedir(arcdir); exit(ERR_BAD_CONFIG); @@ -998,9 +1116,57 @@ do_node_archive_config(void) closedir(arcdir); - local_conn = establish_db_connection(config_file_options.conninfo, true); + /* + * extract list of config files from --config-files + */ + { + int i = 0, j; + int config_file_len = strlen(runtime_options.config_files); + + char filenamebuf[MAXLEN] = ""; + char pathbuf[MAXPGPATH] = ""; + + for (j = 0; j < config_file_len; j++) + { + if (runtime_options.config_files[j] == ',') + { + int filename_len = j - i; + + if (filename_len > MAXLEN) + filename_len = MAXLEN - 1; + + strncpy(filenamebuf, runtime_options.config_files + i, filename_len); + + filenamebuf[filename_len] = '\0'; + + snprintf(pathbuf, MAXPGPATH, + "%s/%s", + config_file_options.data_directory, + filenamebuf); + + key_value_list_set( + &config_files, + filenamebuf, + pathbuf); + + i = j + 1; + } + } + + if (i < config_file_len) + { + strncpy(filenamebuf, runtime_options.config_files + i, config_file_len - i); + snprintf(pathbuf, MAXPGPATH, + "%s/%s", + config_file_options.data_directory, + filenamebuf); + key_value_list_set( + &config_files, + filenamebuf, + pathbuf); + } + } - get_datadir_configuration_files(local_conn, &config_files); for (cell = config_files.head; cell; cell = cell->next) { @@ -1010,14 +1176,22 @@ do_node_archive_config(void) "%s/%s", archive_dir, cell->key); - - copy_file(cell->value, dest_file); - copied_count++; + if (stat(cell->value, &statbuf) == -1) + { + log_warning(_("specified file \"%s\" not found, skipping"), + cell->value); + } + else + { + log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"", + cell->key, dest_file); + copy_file(cell->value, dest_file); + copied_count++; + } } - PQfinish(local_conn); - log_verbose(LOG_INFO, _("%i files copied to %s"), + log_verbose(LOG_INFO, _("%i files copied to \"%s\""), copied_count, archive_dir); } @@ -1077,10 +1251,11 @@ do_node_restore_config(void) snprintf(dest_file_path, MAXPGPATH, "%s/%s", - runtime_options.data_dir, + config_file_options.data_directory, arcdir_ent->d_name); - log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"", src_file_path, dest_file_path); + log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"", + src_file_path, dest_file_path); if (copy_file(src_file_path, dest_file_path) == false) { @@ -1097,27 +1272,29 @@ do_node_restore_config(void) } closedir(arcdir); + log_notice(_("%i files copied to %s"), + copied_count, + config_file_options.data_directory); if (copy_ok == false) { - log_error(_("unable to copy all files from %s"), archive_dir); + log_error(_("unable to copy all files from \"%s\""), archive_dir); exit(ERR_BAD_CONFIG); } - log_notice(_("%i files copied to %s"), copied_count, runtime_options.data_dir); - /* * Finally, delete directory - it should be empty unless it's been interfered * with for some reason, in which case manual intervention is required */ if (rmdir(archive_dir) != 0 && errno != EEXIST) { - log_warning(_("unable to delete %s"), archive_dir); - log_detail(_("directory may need to be manually removed")); + log_warning(_("unable to delete directory \"%s\""), archive_dir); + log_detail("%s", strerror(errno)); + log_hint(_("directory may need to be manually removed")); } else { - log_verbose(LOG_NOTICE, "directory %s deleted", archive_dir); + log_verbose(LOG_INFO, "directory \"%s\" deleted", archive_dir); } return; diff --git a/repmgr-action-node.h b/repmgr-action-node.h index 75120343..810a4de8 100644 --- a/repmgr-action-node.h +++ b/repmgr-action-node.h @@ -11,7 +11,7 @@ extern void do_node_check(void); extern CheckStatus do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output); extern CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, PQExpBufferData *output); - +extern void do_node_rejoin(void); extern void do_node_archive_config(void); extern void do_node_restore_config(void); extern void do_node_service(void); diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index bec51439..3ce89034 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -1598,6 +1598,8 @@ do_standby_switchover(void) XLogRecPtr remote_last_checkpoint_lsn = InvalidXLogRecPtr; ReplInfo replication_info = T_REPLINFO_INTIALIZER; + /* store list of configuration files on the demotion candidate */ + KeyValueList remote_config_files = { NULL, NULL }; /* * SANITY CHECKS @@ -1701,7 +1703,8 @@ do_standby_switchover(void) /* - * If --force-rewind specified, check pg_rewind can be used + * If --force-rewind specified, check pg_rewind can be used, and pre-emptively + * fetch the list of configuration files which should be archived */ if (runtime_options.force_rewind == true) @@ -1722,6 +1725,8 @@ do_standby_switchover(void) } termPQExpBuffer(&reason); + + get_datadir_configuration_files(remote_conn, &remote_config_files); } @@ -2153,33 +2158,72 @@ do_standby_switchover(void) } /* promote standby */ - _do_standby_promote_internal(config_file_options.data_directory); - if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn) + if (1 || replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn) { - // if --force-rewind was supplied, do that now, otherwise exit + KeyValueListCell *cell; + bool first_entry = true; + + if (runtime_options.force_rewind == false) + { + log_error(_("new primary diverges from former primary and --force-rewind not provided")); + /* TODO: "repmgr node rejoin" example, when available */ + log_hint(_("the former primary will need to be restored manually")); + PQfinish(local_conn); + exit(ERR_SWITCHOVER_FAIL); + } + + initPQExpBuffer(&remote_command_str); + make_remote_repmgr_path(&remote_command_str); + appendPQExpBuffer(&remote_command_str, + "node rejoin --upstream-conninfo='%s'", + local_node_record.conninfo); + appendPQExpBuffer(&remote_command_str, + " --config-files="); + + for (cell = remote_config_files.head; cell; cell = cell->next) + { + if (first_entry == false) + appendPQExpBuffer(&remote_command_str, ","); + else + first_entry = false; + + appendPQExpBuffer(&remote_command_str, "%s", cell->key); + } + + log_debug("executing:\n \"%s\"", remote_command_str.data); + + (void)remote_command( + remote_host, + runtime_options.remote_user, + remote_command_str.data, + NULL); + + termPQExpBuffer(&remote_command_str); } + else + { + /* + * Execute `repmgr standby follow` to create recovery.conf and start + * the remote server + * + * XXX replace with "node rejoin" + */ + initPQExpBuffer(&remote_command_str); + make_remote_repmgr_path(&remote_command_str); + appendPQExpBuffer(&remote_command_str, + " -d \\'%s\\' standby follow", + local_node_record.conninfo); + log_debug("executing:\n \"%s\"", remote_command_str.data); + (void)remote_command( + remote_host, + runtime_options.remote_user, + remote_command_str.data, + NULL); - /* - * Execute `repmgr standby follow` to create recovery.conf and start - * the remote server - * - * XXX replace with "node rejoin" - */ - initPQExpBuffer(&remote_command_str); - make_remote_repmgr_path(&remote_command_str); - appendPQExpBuffer(&remote_command_str, - " -d \\'%s\\' standby follow", - local_node_record.conninfo); - log_debug("executing:\n \"%s\"", remote_command_str.data); - (void)remote_command( - remote_host, - runtime_options.remote_user, - remote_command_str.data, - NULL); - - termPQExpBuffer(&remote_command_str); + termPQExpBuffer(&remote_command_str); + } /* TODO: verify this node's record was updated correctly */ diff --git a/repmgr-client-global.h b/repmgr-client-global.h index 37999b15..bca484c7 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -94,6 +94,9 @@ typedef struct bool archiver; bool replication_lag; + /* "node join" options */ + char config_files[MAXLEN]; + /* "node service" options */ char action[MAXLEN]; bool check; @@ -136,6 +139,8 @@ typedef struct false, \ /* "node check" options */ \ false, false, \ + /* "node join" options */ \ + "", \ /* "node service" options */ \ "", false, false, false, \ /* "cluster event" options */ \ diff --git a/repmgr-client.c b/repmgr-client.c index 3a31a0ef..9fa54e05 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -30,6 +30,7 @@ * NODE CHECK * * For internal use: + * NODE REJOIN * NODE ARCHIVE-CONFIG * NODE RESTORE-CONFIG * NODE SERVICE @@ -431,6 +432,12 @@ main(int argc, char **argv) runtime_options.replication_lag = true; break; + /* "node join" options * + * ------------------- */ + case OPT_CONFIG_FILES: + strncpy(runtime_options.config_files, optarg, MAXLEN); + break; + /* "node service" options * * ---------------------- */ @@ -644,7 +651,7 @@ main(int argc, char **argv) * { PRIMARY | MASTER } REGISTER | * STANDBY {REGISTER | UNREGISTER | CLONE [node] | PROMOTE | FOLLOW [node] | SWITCHOVER | REWIND} | * BDR { REGISTER | UNREGISTER } | - * NODE { STATUS | ARCHIVE-CONFIG | RESTORE-CONFIG | SERVICE } | + * NODE { STATUS | CHECK | REJOIN | ARCHIVE-CONFIG | RESTORE-CONFIG | SERVICE } | * CLUSTER { CROSSCHECK | MATRIX | SHOW | CLEANUP | EVENT } * * [node] is an optional hostname, provided instead of the -h/--host optipn @@ -718,6 +725,8 @@ main(int argc, char **argv) action = NODE_CHECK; else if (strcasecmp(repmgr_action, "STATUS") == 0) action = NODE_STATUS; + else if (strcasecmp(repmgr_action, "REJOIN") == 0) + action = NODE_REJOIN; else if (strcasecmp(repmgr_action, "ARCHIVE-CONFIG") == 0) action = NODE_ARCHIVE_CONFIG; else if (strcasecmp(repmgr_action, "RESTORE-CONFIG") == 0) @@ -1046,6 +1055,9 @@ main(int argc, char **argv) case NODE_CHECK: do_node_check(); break; + case NODE_REJOIN: + do_node_rejoin(); + break; case NODE_ARCHIVE_CONFIG: do_node_archive_config(); break; @@ -1378,9 +1390,10 @@ check_cli_parameters(const int action) case NODE_STATUS: break; default: - item_list_append_format(&cli_warnings, - _("--is-shutdown will be ignored when executing %s"), - action_name(action)); + item_list_append_format( + &cli_warnings, + _("--is-shutdown will be ignored when executing %s"), + action_name(action)); } } @@ -1391,9 +1404,10 @@ check_cli_parameters(const int action) case STANDBY_SWITCHOVER: break; default: - item_list_append_format(&cli_warnings, + item_list_append_format( + &cli_warnings, _("--always-promote will be ignored when executing %s"), - action_name(action)); + action_name(action)); } } @@ -1404,9 +1418,25 @@ check_cli_parameters(const int action) case STANDBY_SWITCHOVER: break; default: - item_list_append_format(&cli_warnings, + item_list_append_format( + &cli_warnings, _("--force-rewind will be ignored when executing %s"), - action_name(action)); + action_name(action)); + } + } + + + if (runtime_options.config_files[0] != '\0') + { + switch (action) + { + case NODE_REJOIN: + break; + default: + item_list_append_format( + &cli_warnings, + _("--config-files will be ignored when executing %s"), + action_name(action)); } } @@ -1426,8 +1456,9 @@ check_cli_parameters(const int action) if (used_options > 1) { /* TODO: list which options were used */ - item_list_append(&cli_errors, - "only one of --csv, --nagios and --optformat can be used"); + item_list_append( + &cli_errors, + "only one of --csv, --nagios and --optformat can be used"); } } } @@ -1463,6 +1494,8 @@ action_name(const int action) return "NODE STATUS"; case NODE_CHECK: return "NODE CHECK"; + case NODE_REJOIN: + return "NODE REJOIN"; case NODE_ARCHIVE_CONFIG: return "NODE ARCHIVE-CONFIG"; case NODE_RESTORE_CONFIG: @@ -2849,6 +2882,7 @@ get_server_action(t_server_action action, char *script, char *data_dir) return; } + bool data_dir_required_for_action(t_server_action action) { diff --git a/repmgr-client.h b/repmgr-client.h index 3fa2f752..220ffbd2 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -25,13 +25,14 @@ #define NODE_STATUS 11 #define NODE_CHECK 12 #define NODE_SERVICE 13 -#define NODE_ARCHIVE_CONFIG 14 -#define NODE_RESTORE_CONFIG 15 -#define CLUSTER_SHOW 16 -#define CLUSTER_CLEANUP 17 -#define CLUSTER_MATRIX 18 -#define CLUSTER_CROSSCHECK 19 -#define CLUSTER_EVENT 20 +#define NODE_REJOIN 14 +#define NODE_ARCHIVE_CONFIG 15 +#define NODE_RESTORE_CONFIG 16 +#define CLUSTER_SHOW 17 +#define CLUSTER_CLEANUP 18 +#define CLUSTER_MATRIX 19 +#define CLUSTER_CROSSCHECK 20 +#define CLUSTER_EVENT 21 /* command line options without short versions */ #define OPT_HELP 1001 @@ -69,6 +70,7 @@ #define OPT_ARCHIVER 1032 #define OPT_OPTFORMAT 1033 #define OPT_REPLICATION_LAG 1034 +#define OPT_CONFIG_FILES 1035 /* deprecated since 3.3 */ #define OPT_DATA_DIR 999 #define OPT_NO_CONNINFO_PASSWORD 998 @@ -138,6 +140,9 @@ static struct option long_options[] = {"archiver", no_argument, NULL, OPT_ARCHIVER }, {"replication-lag", no_argument, NULL, OPT_REPLICATION_LAG }, +/* "node join" options */ + {"config-files", required_argument, NULL, OPT_CONFIG_FILES }, + /* "node service" options */ {"action", required_argument, NULL, OPT_ACTION}, {"check", no_argument, NULL, OPT_CHECK},