Enable use of pg_rewind during switchover operations

But only if required and --force-rewind required, and pg_rewind
can actually be used.
This commit is contained in:
Ian Barwick
2017-08-09 12:09:37 +09:00
parent 2553839630
commit b1e544f962
6 changed files with 324 additions and 59 deletions

View File

@@ -13,6 +13,7 @@
#include "controldata.h" #include "controldata.h"
#include "dirutil.h" #include "dirutil.h"
#include "dbutils.h" #include "dbutils.h"
#include "compat.h"
#include "repmgr-client-global.h" #include "repmgr-client-global.h"
#include "repmgr-action-node.h" #include "repmgr-action-node.h"
@@ -903,6 +904,123 @@ parse_server_action(const char *action_name)
} }
/*
* Intended mainly for "internal" use by `node switchover`, which
* calls this on the target server to excute pg_rewind on a demoted
* primary with a forked (sic) timeline. Does not currently check
* whether this is a useful thing to do.
*
* TODO: make this into a more generally useful function.
*/
void
do_node_rejoin(void)
{
PQExpBufferData command;
PQExpBufferData command_output;
struct stat statbuf;
char filebuf[MAXPGPATH];
DBState db_state;
PGPing status;
bool is_shutdown = true;
/* check node is not actually running */
status = PQping(config_file_options.conninfo);
switch (status)
{
case PQPING_NO_ATTEMPT:
log_error(_("unable to determine status of server"));
exit(ERR_BAD_CONFIG);
case PQPING_OK:
is_shutdown = false;
break;
case PQPING_REJECT:
is_shutdown = false;
break;
case PQPING_NO_RESPONSE:
/* status not yet clear */
break;
}
db_state = get_db_state(config_file_options.data_directory);
if (is_shutdown == false)
{
log_error(_("database is still running in state \"%s\""),
describe_db_state(db_state));
exit(ERR_BAD_CONFIG);
}
if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY)
{
log_error(_("database is not shut down cleanly, pg_rewind will not be able to run"));
exit(ERR_BAD_CONFIG);
}
// XXX check if cleanly shut down, pg_rewind will fail if so
// XXX we can probably make this an internal function
do_node_archive_config();
/* execute pg_rewind */
initPQExpBuffer(&command);
appendPQExpBuffer(
&command,
"%s -D ",
make_pg_path("pg_rewind"));
appendShellString(
&command,
config_file_options.data_directory);
appendPQExpBuffer(
&command,
" --source-server='%s'",
runtime_options.upstream_conninfo);
log_notice(_("executing pg_rewind"));
log_debug("pg_rewind command is:\n %s",
command.data);
initPQExpBuffer(&command_output);
// XXX handle failure
(void)local_command(
command.data,
&command_output);
termPQExpBuffer(&command_output);
termPQExpBuffer(&command);
/* Restore any previously archived config files */
do_node_restore_config();
/* remove any recovery.done file copied in by pg_rewind */
snprintf(filebuf, MAXPGPATH,
"%s/recovery.done",
config_file_options.data_directory);
if (stat(filebuf, &statbuf) == 0)
{
log_verbose(LOG_INFO, _("deleting \"recovery.done\""));
if (unlink(filebuf) == -1)
{
log_warning(_("unable to delete \"%s\""),
filebuf);
log_detail("%s", strerror(errno));
}
}
}
/* /*
* Intended mainly for "internal" use by `node switchover`, which * Intended mainly for "internal" use by `node switchover`, which
* calls this on the target server to archive any configuration files * calls this on the target server to archive any configuration files
@@ -919,7 +1037,7 @@ do_node_archive_config(void)
struct dirent *arcdir_ent; struct dirent *arcdir_ent;
DIR *arcdir; DIR *arcdir;
PGconn *local_conn = NULL;
KeyValueList config_files = { NULL, NULL }; KeyValueList config_files = { NULL, NULL };
KeyValueListCell *cell; KeyValueListCell *cell;
int copied_count = 0; int copied_count = 0;
@@ -988,8 +1106,8 @@ do_node_archive_config(void)
if (unlink(arcdir_ent_path) == -1) if (unlink(arcdir_ent_path) == -1)
{ {
log_error(_("unable to create temporary archive directory \"%s\""), log_error(_("unable to delete file in temporary archive directory"));
archive_dir); log_detail(_("file is: \"%s\""), arcdir_ent_path);
log_detail("%s", strerror(errno)); log_detail("%s", strerror(errno));
closedir(arcdir); closedir(arcdir);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
@@ -998,9 +1116,57 @@ do_node_archive_config(void)
closedir(arcdir); closedir(arcdir);
local_conn = establish_db_connection(config_file_options.conninfo, true); /*
* extract list of config files from --config-files
*/
{
int i = 0, j;
int config_file_len = strlen(runtime_options.config_files);
char filenamebuf[MAXLEN] = "";
char pathbuf[MAXPGPATH] = "";
for (j = 0; j < config_file_len; j++)
{
if (runtime_options.config_files[j] == ',')
{
int filename_len = j - i;
if (filename_len > MAXLEN)
filename_len = MAXLEN - 1;
strncpy(filenamebuf, runtime_options.config_files + i, filename_len);
filenamebuf[filename_len] = '\0';
snprintf(pathbuf, MAXPGPATH,
"%s/%s",
config_file_options.data_directory,
filenamebuf);
key_value_list_set(
&config_files,
filenamebuf,
pathbuf);
i = j + 1;
}
}
if (i < config_file_len)
{
strncpy(filenamebuf, runtime_options.config_files + i, config_file_len - i);
snprintf(pathbuf, MAXPGPATH,
"%s/%s",
config_file_options.data_directory,
filenamebuf);
key_value_list_set(
&config_files,
filenamebuf,
pathbuf);
}
}
get_datadir_configuration_files(local_conn, &config_files);
for (cell = config_files.head; cell; cell = cell->next) for (cell = config_files.head; cell; cell = cell->next)
{ {
@@ -1010,14 +1176,22 @@ do_node_archive_config(void)
"%s/%s", "%s/%s",
archive_dir, archive_dir,
cell->key); cell->key);
if (stat(cell->value, &statbuf) == -1)
{
log_warning(_("specified file \"%s\" not found, skipping"),
cell->value);
}
else
{
log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"",
cell->key, dest_file);
copy_file(cell->value, dest_file); copy_file(cell->value, dest_file);
copied_count++; copied_count++;
} }
}
PQfinish(local_conn);
log_verbose(LOG_INFO, _("%i files copied to %s"), log_verbose(LOG_INFO, _("%i files copied to \"%s\""),
copied_count, archive_dir); copied_count, archive_dir);
} }
@@ -1077,10 +1251,11 @@ do_node_restore_config(void)
snprintf(dest_file_path, MAXPGPATH, snprintf(dest_file_path, MAXPGPATH,
"%s/%s", "%s/%s",
runtime_options.data_dir, config_file_options.data_directory,
arcdir_ent->d_name); arcdir_ent->d_name);
log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"", src_file_path, dest_file_path); log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"",
src_file_path, dest_file_path);
if (copy_file(src_file_path, dest_file_path) == false) if (copy_file(src_file_path, dest_file_path) == false)
{ {
@@ -1097,27 +1272,29 @@ do_node_restore_config(void)
} }
closedir(arcdir); closedir(arcdir);
log_notice(_("%i files copied to %s"),
copied_count,
config_file_options.data_directory);
if (copy_ok == false) if (copy_ok == false)
{ {
log_error(_("unable to copy all files from %s"), archive_dir); log_error(_("unable to copy all files from \"%s\""), archive_dir);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
log_notice(_("%i files copied to %s"), copied_count, runtime_options.data_dir);
/* /*
* Finally, delete directory - it should be empty unless it's been interfered * Finally, delete directory - it should be empty unless it's been interfered
* with for some reason, in which case manual intervention is required * with for some reason, in which case manual intervention is required
*/ */
if (rmdir(archive_dir) != 0 && errno != EEXIST) if (rmdir(archive_dir) != 0 && errno != EEXIST)
{ {
log_warning(_("unable to delete %s"), archive_dir); log_warning(_("unable to delete directory \"%s\""), archive_dir);
log_detail(_("directory may need to be manually removed")); log_detail("%s", strerror(errno));
log_hint(_("directory may need to be manually removed"));
} }
else else
{ {
log_verbose(LOG_NOTICE, "directory %s deleted", archive_dir); log_verbose(LOG_INFO, "directory \"%s\" deleted", archive_dir);
} }
return; return;

View File

@@ -11,7 +11,7 @@ extern void do_node_check(void);
extern CheckStatus do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output); extern CheckStatus do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output);
extern CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, PQExpBufferData *output); extern CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, PQExpBufferData *output);
extern void do_node_rejoin(void);
extern void do_node_archive_config(void); extern void do_node_archive_config(void);
extern void do_node_restore_config(void); extern void do_node_restore_config(void);
extern void do_node_service(void); extern void do_node_service(void);

View File

@@ -1598,6 +1598,8 @@ do_standby_switchover(void)
XLogRecPtr remote_last_checkpoint_lsn = InvalidXLogRecPtr; XLogRecPtr remote_last_checkpoint_lsn = InvalidXLogRecPtr;
ReplInfo replication_info = T_REPLINFO_INTIALIZER; ReplInfo replication_info = T_REPLINFO_INTIALIZER;
/* store list of configuration files on the demotion candidate */
KeyValueList remote_config_files = { NULL, NULL };
/* /*
* SANITY CHECKS * SANITY CHECKS
@@ -1701,7 +1703,8 @@ do_standby_switchover(void)
/* /*
* If --force-rewind specified, check pg_rewind can be used * If --force-rewind specified, check pg_rewind can be used, and pre-emptively
* fetch the list of configuration files which should be archived
*/ */
if (runtime_options.force_rewind == true) if (runtime_options.force_rewind == true)
@@ -1722,6 +1725,8 @@ do_standby_switchover(void)
} }
termPQExpBuffer(&reason); termPQExpBuffer(&reason);
get_datadir_configuration_files(remote_conn, &remote_config_files);
} }
@@ -2153,14 +2158,52 @@ do_standby_switchover(void)
} }
/* promote standby */ /* promote standby */
_do_standby_promote_internal(config_file_options.data_directory); _do_standby_promote_internal(config_file_options.data_directory);
if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn) if (1 || replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn)
{ {
// if --force-rewind was supplied, do that now, otherwise exit KeyValueListCell *cell;
bool first_entry = true;
if (runtime_options.force_rewind == false)
{
log_error(_("new primary diverges from former primary and --force-rewind not provided"));
/* TODO: "repmgr node rejoin" example, when available */
log_hint(_("the former primary will need to be restored manually"));
PQfinish(local_conn);
exit(ERR_SWITCHOVER_FAIL);
} }
initPQExpBuffer(&remote_command_str);
make_remote_repmgr_path(&remote_command_str);
appendPQExpBuffer(&remote_command_str,
"node rejoin --upstream-conninfo='%s'",
local_node_record.conninfo);
appendPQExpBuffer(&remote_command_str,
" --config-files=");
for (cell = remote_config_files.head; cell; cell = cell->next)
{
if (first_entry == false)
appendPQExpBuffer(&remote_command_str, ",");
else
first_entry = false;
appendPQExpBuffer(&remote_command_str, "%s", cell->key);
}
log_debug("executing:\n \"%s\"", remote_command_str.data);
(void)remote_command(
remote_host,
runtime_options.remote_user,
remote_command_str.data,
NULL);
termPQExpBuffer(&remote_command_str);
}
else
{
/* /*
* Execute `repmgr standby follow` to create recovery.conf and start * Execute `repmgr standby follow` to create recovery.conf and start
* the remote server * the remote server
@@ -2180,6 +2223,7 @@ do_standby_switchover(void)
NULL); NULL);
termPQExpBuffer(&remote_command_str); termPQExpBuffer(&remote_command_str);
}
/* TODO: verify this node's record was updated correctly */ /* TODO: verify this node's record was updated correctly */

View File

@@ -94,6 +94,9 @@ typedef struct
bool archiver; bool archiver;
bool replication_lag; bool replication_lag;
/* "node join" options */
char config_files[MAXLEN];
/* "node service" options */ /* "node service" options */
char action[MAXLEN]; char action[MAXLEN];
bool check; bool check;
@@ -136,6 +139,8 @@ typedef struct
false, \ false, \
/* "node check" options */ \ /* "node check" options */ \
false, false, \ false, false, \
/* "node join" options */ \
"", \
/* "node service" options */ \ /* "node service" options */ \
"", false, false, false, \ "", false, false, false, \
/* "cluster event" options */ \ /* "cluster event" options */ \

View File

@@ -30,6 +30,7 @@
* NODE CHECK * NODE CHECK
* *
* For internal use: * For internal use:
* NODE REJOIN
* NODE ARCHIVE-CONFIG * NODE ARCHIVE-CONFIG
* NODE RESTORE-CONFIG * NODE RESTORE-CONFIG
* NODE SERVICE * NODE SERVICE
@@ -431,6 +432,12 @@ main(int argc, char **argv)
runtime_options.replication_lag = true; runtime_options.replication_lag = true;
break; break;
/* "node join" options *
* ------------------- */
case OPT_CONFIG_FILES:
strncpy(runtime_options.config_files, optarg, MAXLEN);
break;
/* "node service" options * /* "node service" options *
* ---------------------- */ * ---------------------- */
@@ -644,7 +651,7 @@ main(int argc, char **argv)
* { PRIMARY | MASTER } REGISTER | * { PRIMARY | MASTER } REGISTER |
* STANDBY {REGISTER | UNREGISTER | CLONE [node] | PROMOTE | FOLLOW [node] | SWITCHOVER | REWIND} | * STANDBY {REGISTER | UNREGISTER | CLONE [node] | PROMOTE | FOLLOW [node] | SWITCHOVER | REWIND} |
* BDR { REGISTER | UNREGISTER } | * BDR { REGISTER | UNREGISTER } |
* NODE { STATUS | ARCHIVE-CONFIG | RESTORE-CONFIG | SERVICE } | * NODE { STATUS | CHECK | REJOIN | ARCHIVE-CONFIG | RESTORE-CONFIG | SERVICE } |
* CLUSTER { CROSSCHECK | MATRIX | SHOW | CLEANUP | EVENT } * CLUSTER { CROSSCHECK | MATRIX | SHOW | CLEANUP | EVENT }
* *
* [node] is an optional hostname, provided instead of the -h/--host optipn * [node] is an optional hostname, provided instead of the -h/--host optipn
@@ -718,6 +725,8 @@ main(int argc, char **argv)
action = NODE_CHECK; action = NODE_CHECK;
else if (strcasecmp(repmgr_action, "STATUS") == 0) else if (strcasecmp(repmgr_action, "STATUS") == 0)
action = NODE_STATUS; action = NODE_STATUS;
else if (strcasecmp(repmgr_action, "REJOIN") == 0)
action = NODE_REJOIN;
else if (strcasecmp(repmgr_action, "ARCHIVE-CONFIG") == 0) else if (strcasecmp(repmgr_action, "ARCHIVE-CONFIG") == 0)
action = NODE_ARCHIVE_CONFIG; action = NODE_ARCHIVE_CONFIG;
else if (strcasecmp(repmgr_action, "RESTORE-CONFIG") == 0) else if (strcasecmp(repmgr_action, "RESTORE-CONFIG") == 0)
@@ -1046,6 +1055,9 @@ main(int argc, char **argv)
case NODE_CHECK: case NODE_CHECK:
do_node_check(); do_node_check();
break; break;
case NODE_REJOIN:
do_node_rejoin();
break;
case NODE_ARCHIVE_CONFIG: case NODE_ARCHIVE_CONFIG:
do_node_archive_config(); do_node_archive_config();
break; break;
@@ -1378,7 +1390,8 @@ check_cli_parameters(const int action)
case NODE_STATUS: case NODE_STATUS:
break; break;
default: default:
item_list_append_format(&cli_warnings, item_list_append_format(
&cli_warnings,
_("--is-shutdown will be ignored when executing %s"), _("--is-shutdown will be ignored when executing %s"),
action_name(action)); action_name(action));
} }
@@ -1391,7 +1404,8 @@ check_cli_parameters(const int action)
case STANDBY_SWITCHOVER: case STANDBY_SWITCHOVER:
break; break;
default: default:
item_list_append_format(&cli_warnings, item_list_append_format(
&cli_warnings,
_("--always-promote will be ignored when executing %s"), _("--always-promote will be ignored when executing %s"),
action_name(action)); action_name(action));
} }
@@ -1404,12 +1418,28 @@ check_cli_parameters(const int action)
case STANDBY_SWITCHOVER: case STANDBY_SWITCHOVER:
break; break;
default: default:
item_list_append_format(&cli_warnings, item_list_append_format(
&cli_warnings,
_("--force-rewind will be ignored when executing %s"), _("--force-rewind will be ignored when executing %s"),
action_name(action)); action_name(action));
} }
} }
if (runtime_options.config_files[0] != '\0')
{
switch (action)
{
case NODE_REJOIN:
break;
default:
item_list_append_format(
&cli_warnings,
_("--config-files will be ignored when executing %s"),
action_name(action));
}
}
/* check only one of --csv, --nagios and --optformat used */ /* check only one of --csv, --nagios and --optformat used */
{ {
int used_options = 0; int used_options = 0;
@@ -1426,7 +1456,8 @@ check_cli_parameters(const int action)
if (used_options > 1) if (used_options > 1)
{ {
/* TODO: list which options were used */ /* TODO: list which options were used */
item_list_append(&cli_errors, item_list_append(
&cli_errors,
"only one of --csv, --nagios and --optformat can be used"); "only one of --csv, --nagios and --optformat can be used");
} }
} }
@@ -1463,6 +1494,8 @@ action_name(const int action)
return "NODE STATUS"; return "NODE STATUS";
case NODE_CHECK: case NODE_CHECK:
return "NODE CHECK"; return "NODE CHECK";
case NODE_REJOIN:
return "NODE REJOIN";
case NODE_ARCHIVE_CONFIG: case NODE_ARCHIVE_CONFIG:
return "NODE ARCHIVE-CONFIG"; return "NODE ARCHIVE-CONFIG";
case NODE_RESTORE_CONFIG: case NODE_RESTORE_CONFIG:
@@ -2849,6 +2882,7 @@ get_server_action(t_server_action action, char *script, char *data_dir)
return; return;
} }
bool bool
data_dir_required_for_action(t_server_action action) data_dir_required_for_action(t_server_action action)
{ {

View File

@@ -25,13 +25,14 @@
#define NODE_STATUS 11 #define NODE_STATUS 11
#define NODE_CHECK 12 #define NODE_CHECK 12
#define NODE_SERVICE 13 #define NODE_SERVICE 13
#define NODE_ARCHIVE_CONFIG 14 #define NODE_REJOIN 14
#define NODE_RESTORE_CONFIG 15 #define NODE_ARCHIVE_CONFIG 15
#define CLUSTER_SHOW 16 #define NODE_RESTORE_CONFIG 16
#define CLUSTER_CLEANUP 17 #define CLUSTER_SHOW 17
#define CLUSTER_MATRIX 18 #define CLUSTER_CLEANUP 18
#define CLUSTER_CROSSCHECK 19 #define CLUSTER_MATRIX 19
#define CLUSTER_EVENT 20 #define CLUSTER_CROSSCHECK 20
#define CLUSTER_EVENT 21
/* command line options without short versions */ /* command line options without short versions */
#define OPT_HELP 1001 #define OPT_HELP 1001
@@ -69,6 +70,7 @@
#define OPT_ARCHIVER 1032 #define OPT_ARCHIVER 1032
#define OPT_OPTFORMAT 1033 #define OPT_OPTFORMAT 1033
#define OPT_REPLICATION_LAG 1034 #define OPT_REPLICATION_LAG 1034
#define OPT_CONFIG_FILES 1035
/* deprecated since 3.3 */ /* deprecated since 3.3 */
#define OPT_DATA_DIR 999 #define OPT_DATA_DIR 999
#define OPT_NO_CONNINFO_PASSWORD 998 #define OPT_NO_CONNINFO_PASSWORD 998
@@ -138,6 +140,9 @@ static struct option long_options[] =
{"archiver", no_argument, NULL, OPT_ARCHIVER }, {"archiver", no_argument, NULL, OPT_ARCHIVER },
{"replication-lag", no_argument, NULL, OPT_REPLICATION_LAG }, {"replication-lag", no_argument, NULL, OPT_REPLICATION_LAG },
/* "node join" options */
{"config-files", required_argument, NULL, OPT_CONFIG_FILES },
/* "node service" options */ /* "node service" options */
{"action", required_argument, NULL, OPT_ACTION}, {"action", required_argument, NULL, OPT_ACTION},
{"check", no_argument, NULL, OPT_CHECK}, {"check", no_argument, NULL, OPT_CHECK},