mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
After switchover, enable sibling standbys to follow new primary
This commit is contained in:
12
README.md
12
README.md
@@ -37,11 +37,13 @@ The following commands are available:
|
||||
repmgr standby unregister
|
||||
repmgr standby promote
|
||||
repmgr standby follow
|
||||
repmgr standby switchover
|
||||
|
||||
repmgr bdr register
|
||||
repmgr bdr unregister
|
||||
|
||||
repmgr node status
|
||||
repmgr node check
|
||||
|
||||
repmgr cluster show
|
||||
repmgr cluster event [--all] [--node-id] [--node-name] [--event] [--event-matching]
|
||||
@@ -55,6 +57,16 @@ The following commands are available:
|
||||
|
||||
`master register` can be used as an alias for `primary register`.
|
||||
|
||||
* `standby switchover`
|
||||
|
||||
...
|
||||
|
||||
If other standbys (siblings of the promotion candidate) are connected
|
||||
to the demotion candidate, if `--siblings-follow` is specified `repmgr`
|
||||
can instruct these to follow the new primary. Note this can only work
|
||||
if the configuration file on each sibling is the same path as specifed
|
||||
in -f/--config-file or -C/--remote-config-file.
|
||||
|
||||
* `cluster show`
|
||||
|
||||
Displays information about each active node in the replication cluster. This
|
||||
|
||||
@@ -72,6 +72,7 @@ typedef enum {
|
||||
*/
|
||||
typedef struct s_node_info
|
||||
{
|
||||
/* contents of "repmgr.nodes" */
|
||||
int node_id;
|
||||
int upstream_node_id;
|
||||
t_server_type type;
|
||||
@@ -91,6 +92,7 @@ typedef struct s_node_info
|
||||
PGconn *conn;
|
||||
/* for ad-hoc use e.g. when working with a list of nodes */
|
||||
char details[MAXLEN];
|
||||
bool reachable;
|
||||
/* various statistics */
|
||||
int max_wal_senders;
|
||||
int attached_wal_receivers;
|
||||
@@ -101,6 +103,7 @@ typedef struct s_node_info
|
||||
|
||||
|
||||
#define T_NODE_INFO_INITIALIZER { \
|
||||
/* contents of "repmgr.nodes" */ \
|
||||
NODE_NOT_FOUND, \
|
||||
NO_UPSTREAM_NODE, \
|
||||
UNKNOWN, \
|
||||
@@ -112,12 +115,15 @@ typedef struct s_node_info
|
||||
DEFAULT_PRIORITY, \
|
||||
true, \
|
||||
"", \
|
||||
/* used during failover to track node status */ \
|
||||
InvalidXLogRecPtr, \
|
||||
NODE_STATUS_UNKNOWN, \
|
||||
RECTYPE_UNKNOWN, \
|
||||
MS_NORMAL, \
|
||||
NULL, \
|
||||
"", \
|
||||
/* for ad-hoc use e.g. when working with a list of nodes */ \
|
||||
"", true \
|
||||
/* various statistics */ \
|
||||
-1, -1, -1, -1, -1 \
|
||||
}
|
||||
|
||||
|
||||
@@ -52,7 +52,6 @@ do_node_status(void)
|
||||
return _do_node_status_is_shutdown();
|
||||
}
|
||||
|
||||
|
||||
if (strlen(config_file_options.conninfo))
|
||||
conn = establish_db_connection(config_file_options.conninfo, true);
|
||||
else
|
||||
@@ -979,7 +978,9 @@ do_node_rejoin(void)
|
||||
|
||||
|
||||
/* check provided upstream connection */
|
||||
upstream_conn = establish_db_connection(runtime_options.upstream_conninfo, true);
|
||||
upstream_conn = establish_db_connection_by_params(&source_conninfo, true);
|
||||
|
||||
/* establish_db_connection(runtime_options.upstream_conninfo, true); */
|
||||
|
||||
if (get_primary_node_record(upstream_conn, &primary_node_record) == false)
|
||||
{
|
||||
@@ -1030,7 +1031,7 @@ do_node_rejoin(void)
|
||||
appendPQExpBuffer(
|
||||
&command,
|
||||
" --source-server='%s'",
|
||||
runtime_options.upstream_conninfo);
|
||||
primary_node_record.conninfo);
|
||||
|
||||
log_notice(_("executing pg_rewind"));
|
||||
log_debug("pg_rewind command is:\n %s",
|
||||
|
||||
@@ -1360,8 +1360,6 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Initialise connection parameters to write as `primary_conninfo` */
|
||||
initialize_conninfo_params(&recovery_conninfo, false);
|
||||
|
||||
@@ -1540,17 +1538,15 @@ do_standby_switchover(void)
|
||||
PGconn *local_conn;
|
||||
PGconn *remote_conn;
|
||||
|
||||
|
||||
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
|
||||
|
||||
|
||||
/* the remote server is the primary to be demoted */
|
||||
char remote_conninfo[MAXCONNINFO] = "";
|
||||
char remote_host[MAXLEN] = "";
|
||||
int remote_node_id;
|
||||
t_node_info remote_node_record = T_NODE_INFO_INITIALIZER;
|
||||
|
||||
RecordStatus record_status;
|
||||
RecordStatus record_status;
|
||||
RecoveryType recovery_type;
|
||||
PQExpBufferData remote_command_str;
|
||||
PQExpBufferData command_output;
|
||||
@@ -1565,6 +1561,10 @@ do_standby_switchover(void)
|
||||
/* store list of configuration files on the demotion candidate */
|
||||
KeyValueList remote_config_files = { NULL, NULL };
|
||||
|
||||
/* store list of sibling nodes if --siblings-follow specified */
|
||||
NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||
int unreachable_sibling_node_count = 0;
|
||||
|
||||
/*
|
||||
* SANITY CHECKS
|
||||
*
|
||||
@@ -1847,9 +1847,6 @@ do_standby_switchover(void)
|
||||
}
|
||||
|
||||
PQfinish(remote_conn);
|
||||
PQfinish(local_conn);
|
||||
|
||||
|
||||
|
||||
/* Determine the remote's configuration file location */
|
||||
/* -------------------------------------------------- */
|
||||
@@ -1884,6 +1881,7 @@ do_standby_switchover(void)
|
||||
log_error(_("unable to find the specified repmgr configuration file on remote server"));
|
||||
log_detail(_("remote configuration file is \"%s\""),
|
||||
runtime_options.remote_config_file);
|
||||
PQfinish(local_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
@@ -1952,10 +1950,82 @@ do_standby_switchover(void)
|
||||
{
|
||||
log_error(_("no remote configuration file supplied or found in a default location - terminating"));
|
||||
log_hint(_("specify the remote configuration file with -C/--remote-config-file"));
|
||||
PQfinish(local_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If --siblings-follow specified, get list and check they're reachable
|
||||
*/
|
||||
|
||||
if (runtime_options.siblings_follow == true)
|
||||
{
|
||||
char host[MAXLEN] = "";
|
||||
NodeInfoListCell *cell;
|
||||
|
||||
get_active_sibling_node_records(local_conn,
|
||||
local_node_record.node_id,
|
||||
local_node_record.upstream_node_id,
|
||||
&sibling_nodes);
|
||||
|
||||
log_verbose(LOG_INFO, _("%i active sibling nodes found"),
|
||||
sibling_nodes.node_count);
|
||||
|
||||
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
/* get host from node record */
|
||||
get_conninfo_value(cell->node_info->conninfo, "host", host);
|
||||
r = test_ssh_connection(host, runtime_options.remote_user);
|
||||
|
||||
if (r != 0)
|
||||
{
|
||||
cell->node_info->reachable = false;
|
||||
unreachable_sibling_node_count++;
|
||||
}
|
||||
else
|
||||
{
|
||||
cell->node_info->reachable = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (unreachable_sibling_node_count > 0)
|
||||
{
|
||||
if (runtime_options.force == false)
|
||||
{
|
||||
log_error(_("%i of %i sibling nodes unreachable via SSH:"),
|
||||
unreachable_sibling_node_count,
|
||||
sibling_nodes.node_count);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_warning(_("%i of %i sibling nodes unreachable via SSH:"),
|
||||
unreachable_sibling_node_count,
|
||||
sibling_nodes.node_count);
|
||||
}
|
||||
|
||||
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
if (cell->node_info->reachable == true)
|
||||
continue;
|
||||
log_detail(" %s (ID: %i)",
|
||||
cell->node_info->node_name,
|
||||
cell->node_info->node_id);
|
||||
}
|
||||
|
||||
if (runtime_options.force == false)
|
||||
{
|
||||
log_hint(_("use -F/--force to proceed in any case"));
|
||||
PQfinish(local_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
|
||||
log_detail(_("F/--force specified, proceeding anyway"));
|
||||
}
|
||||
}
|
||||
PQfinish(local_conn);
|
||||
|
||||
|
||||
/*
|
||||
* Sanity checks completed - prepare for the switchover
|
||||
@@ -2165,7 +2235,7 @@ do_standby_switchover(void)
|
||||
make_remote_repmgr_path(&remote_command_str);
|
||||
|
||||
appendPQExpBuffer(&remote_command_str,
|
||||
"%s--upstream-conninfo=\\'%s\\' node rejoin",
|
||||
"%s-d \\'%s\\' node rejoin",
|
||||
node_rejoin_options.data,
|
||||
local_node_record.conninfo);
|
||||
|
||||
@@ -2218,6 +2288,57 @@ do_standby_switchover(void)
|
||||
log_detail(_("node \"%s\" is now primary"),
|
||||
local_node_record.node_name);
|
||||
|
||||
/*
|
||||
* If --siblings-follow specified, attempt to make them follow the
|
||||
* new standby
|
||||
*/
|
||||
|
||||
if (runtime_options.siblings_follow == true)
|
||||
{
|
||||
int failed_follow_count = 0;
|
||||
char host[MAXLEN] = "";
|
||||
NodeInfoListCell *cell;
|
||||
log_notice(_("executing STANDBY FOLLOW on %i of %i siblings"),
|
||||
sibling_nodes.node_count - unreachable_sibling_node_count,
|
||||
sibling_nodes.node_count);
|
||||
|
||||
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
int r = 0;
|
||||
log_debug("XXX %s", cell->node_info->node_name);
|
||||
/* skip nodes previously determined as unreachable */
|
||||
if (cell->node_info->reachable == false)
|
||||
{
|
||||
log_debug(" XXX unreachable!");
|
||||
continue;
|
||||
}
|
||||
|
||||
initPQExpBuffer(&remote_command_str);
|
||||
make_remote_repmgr_path(&remote_command_str);
|
||||
|
||||
appendPQExpBuffer(&remote_command_str,
|
||||
"standby follow");
|
||||
get_conninfo_value(cell->node_info->conninfo, "host", host);
|
||||
log_debug("executing:\n \"%s\"", remote_command_str.data);
|
||||
r = remote_command(
|
||||
host,
|
||||
runtime_options.remote_user,
|
||||
remote_command_str.data,
|
||||
NULL);
|
||||
if (r != 0)
|
||||
{
|
||||
log_warning(_("STANDBY FOLLOW failed on node \"%s\""),
|
||||
cell->node_info->node_name);
|
||||
failed_follow_count++;
|
||||
}
|
||||
termPQExpBuffer(&remote_command_str);
|
||||
}
|
||||
|
||||
if (failed_follow_count == 0)
|
||||
{
|
||||
log_info(_("STANDBY FOLLOW"));
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -3381,7 +3502,7 @@ copy_configuration_files(void)
|
||||
{
|
||||
int i, r;
|
||||
t_configfile_info *file;
|
||||
char *host;
|
||||
char *host = NULL;
|
||||
|
||||
/* get host from upstream record */
|
||||
host = param_get(&recovery_conninfo, "host");
|
||||
|
||||
@@ -86,6 +86,7 @@ typedef struct
|
||||
char remote_config_file[MAXPGPATH];
|
||||
bool always_promote;
|
||||
bool force_rewind;
|
||||
bool siblings_follow;
|
||||
|
||||
/* "node status" options */
|
||||
bool is_shutdown;
|
||||
@@ -134,7 +135,7 @@ typedef struct
|
||||
/* "standby register" options */ \
|
||||
false, 0, \
|
||||
/* "standby switchover" options */ \
|
||||
"", false, false, \
|
||||
"", false, false, false, \
|
||||
/* "node status" options */ \
|
||||
false, \
|
||||
/* "node check" options */ \
|
||||
|
||||
@@ -401,6 +401,7 @@ main(int argc, char **argv)
|
||||
/* "standby switchover" options *
|
||||
* ---------------------------- */
|
||||
|
||||
/* -C/--remote-config-file */
|
||||
case 'C':
|
||||
strncpy(runtime_options.remote_config_file, optarg, MAXPGPATH);
|
||||
break;
|
||||
@@ -413,6 +414,10 @@ main(int argc, char **argv)
|
||||
runtime_options.force_rewind = true;
|
||||
break;
|
||||
|
||||
case OPT_SIBLINGS_FOLLOW:
|
||||
runtime_options.siblings_follow = true;
|
||||
break;
|
||||
|
||||
/* "node status" options *
|
||||
* --------------------- */
|
||||
|
||||
@@ -1178,11 +1183,11 @@ check_cli_parameters(const int action)
|
||||
break;
|
||||
|
||||
case NODE_REJOIN:
|
||||
if (runtime_options.upstream_conninfo[0] == '\0')
|
||||
if (runtime_options.connection_param_provided == false)
|
||||
{
|
||||
item_list_append(
|
||||
&cli_errors,
|
||||
"--upstream-conninfo must be provided with NODE REJOIN");
|
||||
"database connection parameters for an available node must be provided when executing NODE REJOIN");
|
||||
}
|
||||
break;
|
||||
case CLUSTER_SHOW:
|
||||
|
||||
@@ -69,6 +69,7 @@
|
||||
#define OPT_OPTFORMAT 1033
|
||||
#define OPT_REPLICATION_LAG 1034
|
||||
#define OPT_CONFIG_FILES 1035
|
||||
#define OPT_SIBLINGS_FOLLOW 1036
|
||||
/* deprecated since 3.3 */
|
||||
#define OPT_DATA_DIR 999
|
||||
#define OPT_NO_CONNINFO_PASSWORD 998
|
||||
@@ -130,6 +131,7 @@ static struct option long_options[] =
|
||||
{"remote-config-file", required_argument, NULL, 'C'},
|
||||
{"always-promote", no_argument, NULL, OPT_ALWAYS_PROMOTE },
|
||||
{"force-rewind", no_argument, NULL, OPT_FORCE_REWIND },
|
||||
{"siblings-follow", no_argument, NULL, OPT_SIBLINGS_FOLLOW },
|
||||
|
||||
/* "node status" options */
|
||||
{"is-shutdown", no_argument, NULL, OPT_IS_SHUTDOWN },
|
||||
|
||||
Reference in New Issue
Block a user