Enable use of pg_rewind during switchover operations

But only if required and --force-rewind required, and pg_rewind
can actually be used.
This commit is contained in:
Ian Barwick
2017-08-09 12:09:37 +09:00
parent 2553839630
commit b1e544f962
6 changed files with 324 additions and 59 deletions

View File

@@ -13,6 +13,7 @@
#include "controldata.h"
#include "dirutil.h"
#include "dbutils.h"
#include "compat.h"
#include "repmgr-client-global.h"
#include "repmgr-action-node.h"
@@ -903,6 +904,123 @@ parse_server_action(const char *action_name)
}
/*
* Intended mainly for "internal" use by `node switchover`, which
* calls this on the target server to excute pg_rewind on a demoted
* primary with a forked (sic) timeline. Does not currently check
* whether this is a useful thing to do.
*
* TODO: make this into a more generally useful function.
*/
void
do_node_rejoin(void)
{
PQExpBufferData command;
PQExpBufferData command_output;
struct stat statbuf;
char filebuf[MAXPGPATH];
DBState db_state;
PGPing status;
bool is_shutdown = true;
/* check node is not actually running */
status = PQping(config_file_options.conninfo);
switch (status)
{
case PQPING_NO_ATTEMPT:
log_error(_("unable to determine status of server"));
exit(ERR_BAD_CONFIG);
case PQPING_OK:
is_shutdown = false;
break;
case PQPING_REJECT:
is_shutdown = false;
break;
case PQPING_NO_RESPONSE:
/* status not yet clear */
break;
}
db_state = get_db_state(config_file_options.data_directory);
if (is_shutdown == false)
{
log_error(_("database is still running in state \"%s\""),
describe_db_state(db_state));
exit(ERR_BAD_CONFIG);
}
if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY)
{
log_error(_("database is not shut down cleanly, pg_rewind will not be able to run"));
exit(ERR_BAD_CONFIG);
}
// XXX check if cleanly shut down, pg_rewind will fail if so
// XXX we can probably make this an internal function
do_node_archive_config();
/* execute pg_rewind */
initPQExpBuffer(&command);
appendPQExpBuffer(
&command,
"%s -D ",
make_pg_path("pg_rewind"));
appendShellString(
&command,
config_file_options.data_directory);
appendPQExpBuffer(
&command,
" --source-server='%s'",
runtime_options.upstream_conninfo);
log_notice(_("executing pg_rewind"));
log_debug("pg_rewind command is:\n %s",
command.data);
initPQExpBuffer(&command_output);
// XXX handle failure
(void)local_command(
command.data,
&command_output);
termPQExpBuffer(&command_output);
termPQExpBuffer(&command);
/* Restore any previously archived config files */
do_node_restore_config();
/* remove any recovery.done file copied in by pg_rewind */
snprintf(filebuf, MAXPGPATH,
"%s/recovery.done",
config_file_options.data_directory);
if (stat(filebuf, &statbuf) == 0)
{
log_verbose(LOG_INFO, _("deleting \"recovery.done\""));
if (unlink(filebuf) == -1)
{
log_warning(_("unable to delete \"%s\""),
filebuf);
log_detail("%s", strerror(errno));
}
}
}
/*
* Intended mainly for "internal" use by `node switchover`, which
* calls this on the target server to archive any configuration files
@@ -919,7 +1037,7 @@ do_node_archive_config(void)
struct dirent *arcdir_ent;
DIR *arcdir;
PGconn *local_conn = NULL;
KeyValueList config_files = { NULL, NULL };
KeyValueListCell *cell;
int copied_count = 0;
@@ -988,8 +1106,8 @@ do_node_archive_config(void)
if (unlink(arcdir_ent_path) == -1)
{
log_error(_("unable to create temporary archive directory \"%s\""),
archive_dir);
log_error(_("unable to delete file in temporary archive directory"));
log_detail(_("file is: \"%s\""), arcdir_ent_path);
log_detail("%s", strerror(errno));
closedir(arcdir);
exit(ERR_BAD_CONFIG);
@@ -998,9 +1116,57 @@ do_node_archive_config(void)
closedir(arcdir);
local_conn = establish_db_connection(config_file_options.conninfo, true);
/*
* extract list of config files from --config-files
*/
{
int i = 0, j;
int config_file_len = strlen(runtime_options.config_files);
char filenamebuf[MAXLEN] = "";
char pathbuf[MAXPGPATH] = "";
for (j = 0; j < config_file_len; j++)
{
if (runtime_options.config_files[j] == ',')
{
int filename_len = j - i;
if (filename_len > MAXLEN)
filename_len = MAXLEN - 1;
strncpy(filenamebuf, runtime_options.config_files + i, filename_len);
filenamebuf[filename_len] = '\0';
snprintf(pathbuf, MAXPGPATH,
"%s/%s",
config_file_options.data_directory,
filenamebuf);
key_value_list_set(
&config_files,
filenamebuf,
pathbuf);
i = j + 1;
}
}
if (i < config_file_len)
{
strncpy(filenamebuf, runtime_options.config_files + i, config_file_len - i);
snprintf(pathbuf, MAXPGPATH,
"%s/%s",
config_file_options.data_directory,
filenamebuf);
key_value_list_set(
&config_files,
filenamebuf,
pathbuf);
}
}
get_datadir_configuration_files(local_conn, &config_files);
for (cell = config_files.head; cell; cell = cell->next)
{
@@ -1010,14 +1176,22 @@ do_node_archive_config(void)
"%s/%s",
archive_dir,
cell->key);
copy_file(cell->value, dest_file);
copied_count++;
if (stat(cell->value, &statbuf) == -1)
{
log_warning(_("specified file \"%s\" not found, skipping"),
cell->value);
}
else
{
log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"",
cell->key, dest_file);
copy_file(cell->value, dest_file);
copied_count++;
}
}
PQfinish(local_conn);
log_verbose(LOG_INFO, _("%i files copied to %s"),
log_verbose(LOG_INFO, _("%i files copied to \"%s\""),
copied_count, archive_dir);
}
@@ -1077,10 +1251,11 @@ do_node_restore_config(void)
snprintf(dest_file_path, MAXPGPATH,
"%s/%s",
runtime_options.data_dir,
config_file_options.data_directory,
arcdir_ent->d_name);
log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"", src_file_path, dest_file_path);
log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"",
src_file_path, dest_file_path);
if (copy_file(src_file_path, dest_file_path) == false)
{
@@ -1097,27 +1272,29 @@ do_node_restore_config(void)
}
closedir(arcdir);
log_notice(_("%i files copied to %s"),
copied_count,
config_file_options.data_directory);
if (copy_ok == false)
{
log_error(_("unable to copy all files from %s"), archive_dir);
log_error(_("unable to copy all files from \"%s\""), archive_dir);
exit(ERR_BAD_CONFIG);
}
log_notice(_("%i files copied to %s"), copied_count, runtime_options.data_dir);
/*
* Finally, delete directory - it should be empty unless it's been interfered
* with for some reason, in which case manual intervention is required
*/
if (rmdir(archive_dir) != 0 && errno != EEXIST)
{
log_warning(_("unable to delete %s"), archive_dir);
log_detail(_("directory may need to be manually removed"));
log_warning(_("unable to delete directory \"%s\""), archive_dir);
log_detail("%s", strerror(errno));
log_hint(_("directory may need to be manually removed"));
}
else
{
log_verbose(LOG_NOTICE, "directory %s deleted", archive_dir);
log_verbose(LOG_INFO, "directory \"%s\" deleted", archive_dir);
}
return;

View File

@@ -11,7 +11,7 @@ extern void do_node_check(void);
extern CheckStatus do_node_check_archiver(PGconn *conn, OutputMode mode, PQExpBufferData *output);
extern CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, PQExpBufferData *output);
extern void do_node_rejoin(void);
extern void do_node_archive_config(void);
extern void do_node_restore_config(void);
extern void do_node_service(void);

View File

@@ -1598,6 +1598,8 @@ do_standby_switchover(void)
XLogRecPtr remote_last_checkpoint_lsn = InvalidXLogRecPtr;
ReplInfo replication_info = T_REPLINFO_INTIALIZER;
/* store list of configuration files on the demotion candidate */
KeyValueList remote_config_files = { NULL, NULL };
/*
* SANITY CHECKS
@@ -1701,7 +1703,8 @@ do_standby_switchover(void)
/*
* If --force-rewind specified, check pg_rewind can be used
* If --force-rewind specified, check pg_rewind can be used, and pre-emptively
* fetch the list of configuration files which should be archived
*/
if (runtime_options.force_rewind == true)
@@ -1722,6 +1725,8 @@ do_standby_switchover(void)
}
termPQExpBuffer(&reason);
get_datadir_configuration_files(remote_conn, &remote_config_files);
}
@@ -2153,33 +2158,72 @@ do_standby_switchover(void)
}
/* promote standby */
_do_standby_promote_internal(config_file_options.data_directory);
if (replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn)
if (1 || replication_info.last_wal_receive_lsn < remote_last_checkpoint_lsn)
{
// if --force-rewind was supplied, do that now, otherwise exit
KeyValueListCell *cell;
bool first_entry = true;
if (runtime_options.force_rewind == false)
{
log_error(_("new primary diverges from former primary and --force-rewind not provided"));
/* TODO: "repmgr node rejoin" example, when available */
log_hint(_("the former primary will need to be restored manually"));
PQfinish(local_conn);
exit(ERR_SWITCHOVER_FAIL);
}
initPQExpBuffer(&remote_command_str);
make_remote_repmgr_path(&remote_command_str);
appendPQExpBuffer(&remote_command_str,
"node rejoin --upstream-conninfo='%s'",
local_node_record.conninfo);
appendPQExpBuffer(&remote_command_str,
" --config-files=");
for (cell = remote_config_files.head; cell; cell = cell->next)
{
if (first_entry == false)
appendPQExpBuffer(&remote_command_str, ",");
else
first_entry = false;
appendPQExpBuffer(&remote_command_str, "%s", cell->key);
}
log_debug("executing:\n \"%s\"", remote_command_str.data);
(void)remote_command(
remote_host,
runtime_options.remote_user,
remote_command_str.data,
NULL);
termPQExpBuffer(&remote_command_str);
}
else
{
/*
* Execute `repmgr standby follow` to create recovery.conf and start
* the remote server
*
* XXX replace with "node rejoin"
*/
initPQExpBuffer(&remote_command_str);
make_remote_repmgr_path(&remote_command_str);
appendPQExpBuffer(&remote_command_str,
" -d \\'%s\\' standby follow",
local_node_record.conninfo);
log_debug("executing:\n \"%s\"", remote_command_str.data);
(void)remote_command(
remote_host,
runtime_options.remote_user,
remote_command_str.data,
NULL);
/*
* Execute `repmgr standby follow` to create recovery.conf and start
* the remote server
*
* XXX replace with "node rejoin"
*/
initPQExpBuffer(&remote_command_str);
make_remote_repmgr_path(&remote_command_str);
appendPQExpBuffer(&remote_command_str,
" -d \\'%s\\' standby follow",
local_node_record.conninfo);
log_debug("executing:\n \"%s\"", remote_command_str.data);
(void)remote_command(
remote_host,
runtime_options.remote_user,
remote_command_str.data,
NULL);
termPQExpBuffer(&remote_command_str);
termPQExpBuffer(&remote_command_str);
}
/* TODO: verify this node's record was updated correctly */

View File

@@ -94,6 +94,9 @@ typedef struct
bool archiver;
bool replication_lag;
/* "node join" options */
char config_files[MAXLEN];
/* "node service" options */
char action[MAXLEN];
bool check;
@@ -136,6 +139,8 @@ typedef struct
false, \
/* "node check" options */ \
false, false, \
/* "node join" options */ \
"", \
/* "node service" options */ \
"", false, false, false, \
/* "cluster event" options */ \

View File

@@ -30,6 +30,7 @@
* NODE CHECK
*
* For internal use:
* NODE REJOIN
* NODE ARCHIVE-CONFIG
* NODE RESTORE-CONFIG
* NODE SERVICE
@@ -431,6 +432,12 @@ main(int argc, char **argv)
runtime_options.replication_lag = true;
break;
/* "node join" options *
* ------------------- */
case OPT_CONFIG_FILES:
strncpy(runtime_options.config_files, optarg, MAXLEN);
break;
/* "node service" options *
* ---------------------- */
@@ -644,7 +651,7 @@ main(int argc, char **argv)
* { PRIMARY | MASTER } REGISTER |
* STANDBY {REGISTER | UNREGISTER | CLONE [node] | PROMOTE | FOLLOW [node] | SWITCHOVER | REWIND} |
* BDR { REGISTER | UNREGISTER } |
* NODE { STATUS | ARCHIVE-CONFIG | RESTORE-CONFIG | SERVICE } |
* NODE { STATUS | CHECK | REJOIN | ARCHIVE-CONFIG | RESTORE-CONFIG | SERVICE } |
* CLUSTER { CROSSCHECK | MATRIX | SHOW | CLEANUP | EVENT }
*
* [node] is an optional hostname, provided instead of the -h/--host optipn
@@ -718,6 +725,8 @@ main(int argc, char **argv)
action = NODE_CHECK;
else if (strcasecmp(repmgr_action, "STATUS") == 0)
action = NODE_STATUS;
else if (strcasecmp(repmgr_action, "REJOIN") == 0)
action = NODE_REJOIN;
else if (strcasecmp(repmgr_action, "ARCHIVE-CONFIG") == 0)
action = NODE_ARCHIVE_CONFIG;
else if (strcasecmp(repmgr_action, "RESTORE-CONFIG") == 0)
@@ -1046,6 +1055,9 @@ main(int argc, char **argv)
case NODE_CHECK:
do_node_check();
break;
case NODE_REJOIN:
do_node_rejoin();
break;
case NODE_ARCHIVE_CONFIG:
do_node_archive_config();
break;
@@ -1378,9 +1390,10 @@ check_cli_parameters(const int action)
case NODE_STATUS:
break;
default:
item_list_append_format(&cli_warnings,
_("--is-shutdown will be ignored when executing %s"),
action_name(action));
item_list_append_format(
&cli_warnings,
_("--is-shutdown will be ignored when executing %s"),
action_name(action));
}
}
@@ -1391,9 +1404,10 @@ check_cli_parameters(const int action)
case STANDBY_SWITCHOVER:
break;
default:
item_list_append_format(&cli_warnings,
item_list_append_format(
&cli_warnings,
_("--always-promote will be ignored when executing %s"),
action_name(action));
action_name(action));
}
}
@@ -1404,9 +1418,25 @@ check_cli_parameters(const int action)
case STANDBY_SWITCHOVER:
break;
default:
item_list_append_format(&cli_warnings,
item_list_append_format(
&cli_warnings,
_("--force-rewind will be ignored when executing %s"),
action_name(action));
action_name(action));
}
}
if (runtime_options.config_files[0] != '\0')
{
switch (action)
{
case NODE_REJOIN:
break;
default:
item_list_append_format(
&cli_warnings,
_("--config-files will be ignored when executing %s"),
action_name(action));
}
}
@@ -1426,8 +1456,9 @@ check_cli_parameters(const int action)
if (used_options > 1)
{
/* TODO: list which options were used */
item_list_append(&cli_errors,
"only one of --csv, --nagios and --optformat can be used");
item_list_append(
&cli_errors,
"only one of --csv, --nagios and --optformat can be used");
}
}
}
@@ -1463,6 +1494,8 @@ action_name(const int action)
return "NODE STATUS";
case NODE_CHECK:
return "NODE CHECK";
case NODE_REJOIN:
return "NODE REJOIN";
case NODE_ARCHIVE_CONFIG:
return "NODE ARCHIVE-CONFIG";
case NODE_RESTORE_CONFIG:
@@ -2849,6 +2882,7 @@ get_server_action(t_server_action action, char *script, char *data_dir)
return;
}
bool
data_dir_required_for_action(t_server_action action)
{

View File

@@ -25,13 +25,14 @@
#define NODE_STATUS 11
#define NODE_CHECK 12
#define NODE_SERVICE 13
#define NODE_ARCHIVE_CONFIG 14
#define NODE_RESTORE_CONFIG 15
#define CLUSTER_SHOW 16
#define CLUSTER_CLEANUP 17
#define CLUSTER_MATRIX 18
#define CLUSTER_CROSSCHECK 19
#define CLUSTER_EVENT 20
#define NODE_REJOIN 14
#define NODE_ARCHIVE_CONFIG 15
#define NODE_RESTORE_CONFIG 16
#define CLUSTER_SHOW 17
#define CLUSTER_CLEANUP 18
#define CLUSTER_MATRIX 19
#define CLUSTER_CROSSCHECK 20
#define CLUSTER_EVENT 21
/* command line options without short versions */
#define OPT_HELP 1001
@@ -69,6 +70,7 @@
#define OPT_ARCHIVER 1032
#define OPT_OPTFORMAT 1033
#define OPT_REPLICATION_LAG 1034
#define OPT_CONFIG_FILES 1035
/* deprecated since 3.3 */
#define OPT_DATA_DIR 999
#define OPT_NO_CONNINFO_PASSWORD 998
@@ -138,6 +140,9 @@ static struct option long_options[] =
{"archiver", no_argument, NULL, OPT_ARCHIVER },
{"replication-lag", no_argument, NULL, OPT_REPLICATION_LAG },
/* "node join" options */
{"config-files", required_argument, NULL, OPT_CONFIG_FILES },
/* "node service" options */
{"action", required_argument, NULL, OPT_ACTION},
{"check", no_argument, NULL, OPT_CHECK},