mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-27 17:06:29 +00:00
After switchover, enable sibling standbys to follow new primary
This commit is contained in:
12
README.md
12
README.md
@@ -37,11 +37,13 @@ The following commands are available:
|
|||||||
repmgr standby unregister
|
repmgr standby unregister
|
||||||
repmgr standby promote
|
repmgr standby promote
|
||||||
repmgr standby follow
|
repmgr standby follow
|
||||||
|
repmgr standby switchover
|
||||||
|
|
||||||
repmgr bdr register
|
repmgr bdr register
|
||||||
repmgr bdr unregister
|
repmgr bdr unregister
|
||||||
|
|
||||||
repmgr node status
|
repmgr node status
|
||||||
|
repmgr node check
|
||||||
|
|
||||||
repmgr cluster show
|
repmgr cluster show
|
||||||
repmgr cluster event [--all] [--node-id] [--node-name] [--event] [--event-matching]
|
repmgr cluster event [--all] [--node-id] [--node-name] [--event] [--event-matching]
|
||||||
@@ -55,6 +57,16 @@ The following commands are available:
|
|||||||
|
|
||||||
`master register` can be used as an alias for `primary register`.
|
`master register` can be used as an alias for `primary register`.
|
||||||
|
|
||||||
|
* `standby switchover`
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
If other standbys (siblings of the promotion candidate) are connected
|
||||||
|
to the demotion candidate, if `--siblings-follow` is specified `repmgr`
|
||||||
|
can instruct these to follow the new primary. Note this can only work
|
||||||
|
if the configuration file on each sibling is the same path as specifed
|
||||||
|
in -f/--config-file or -C/--remote-config-file.
|
||||||
|
|
||||||
* `cluster show`
|
* `cluster show`
|
||||||
|
|
||||||
Displays information about each active node in the replication cluster. This
|
Displays information about each active node in the replication cluster. This
|
||||||
|
|||||||
@@ -72,6 +72,7 @@ typedef enum {
|
|||||||
*/
|
*/
|
||||||
typedef struct s_node_info
|
typedef struct s_node_info
|
||||||
{
|
{
|
||||||
|
/* contents of "repmgr.nodes" */
|
||||||
int node_id;
|
int node_id;
|
||||||
int upstream_node_id;
|
int upstream_node_id;
|
||||||
t_server_type type;
|
t_server_type type;
|
||||||
@@ -91,6 +92,7 @@ typedef struct s_node_info
|
|||||||
PGconn *conn;
|
PGconn *conn;
|
||||||
/* for ad-hoc use e.g. when working with a list of nodes */
|
/* for ad-hoc use e.g. when working with a list of nodes */
|
||||||
char details[MAXLEN];
|
char details[MAXLEN];
|
||||||
|
bool reachable;
|
||||||
/* various statistics */
|
/* various statistics */
|
||||||
int max_wal_senders;
|
int max_wal_senders;
|
||||||
int attached_wal_receivers;
|
int attached_wal_receivers;
|
||||||
@@ -101,6 +103,7 @@ typedef struct s_node_info
|
|||||||
|
|
||||||
|
|
||||||
#define T_NODE_INFO_INITIALIZER { \
|
#define T_NODE_INFO_INITIALIZER { \
|
||||||
|
/* contents of "repmgr.nodes" */ \
|
||||||
NODE_NOT_FOUND, \
|
NODE_NOT_FOUND, \
|
||||||
NO_UPSTREAM_NODE, \
|
NO_UPSTREAM_NODE, \
|
||||||
UNKNOWN, \
|
UNKNOWN, \
|
||||||
@@ -112,12 +115,15 @@ typedef struct s_node_info
|
|||||||
DEFAULT_PRIORITY, \
|
DEFAULT_PRIORITY, \
|
||||||
true, \
|
true, \
|
||||||
"", \
|
"", \
|
||||||
|
/* used during failover to track node status */ \
|
||||||
InvalidXLogRecPtr, \
|
InvalidXLogRecPtr, \
|
||||||
NODE_STATUS_UNKNOWN, \
|
NODE_STATUS_UNKNOWN, \
|
||||||
RECTYPE_UNKNOWN, \
|
RECTYPE_UNKNOWN, \
|
||||||
MS_NORMAL, \
|
MS_NORMAL, \
|
||||||
NULL, \
|
NULL, \
|
||||||
"", \
|
/* for ad-hoc use e.g. when working with a list of nodes */ \
|
||||||
|
"", true \
|
||||||
|
/* various statistics */ \
|
||||||
-1, -1, -1, -1, -1 \
|
-1, -1, -1, -1, -1 \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -52,7 +52,6 @@ do_node_status(void)
|
|||||||
return _do_node_status_is_shutdown();
|
return _do_node_status_is_shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (strlen(config_file_options.conninfo))
|
if (strlen(config_file_options.conninfo))
|
||||||
conn = establish_db_connection(config_file_options.conninfo, true);
|
conn = establish_db_connection(config_file_options.conninfo, true);
|
||||||
else
|
else
|
||||||
@@ -979,7 +978,9 @@ do_node_rejoin(void)
|
|||||||
|
|
||||||
|
|
||||||
/* check provided upstream connection */
|
/* check provided upstream connection */
|
||||||
upstream_conn = establish_db_connection(runtime_options.upstream_conninfo, true);
|
upstream_conn = establish_db_connection_by_params(&source_conninfo, true);
|
||||||
|
|
||||||
|
/* establish_db_connection(runtime_options.upstream_conninfo, true); */
|
||||||
|
|
||||||
if (get_primary_node_record(upstream_conn, &primary_node_record) == false)
|
if (get_primary_node_record(upstream_conn, &primary_node_record) == false)
|
||||||
{
|
{
|
||||||
@@ -1030,7 +1031,7 @@ do_node_rejoin(void)
|
|||||||
appendPQExpBuffer(
|
appendPQExpBuffer(
|
||||||
&command,
|
&command,
|
||||||
" --source-server='%s'",
|
" --source-server='%s'",
|
||||||
runtime_options.upstream_conninfo);
|
primary_node_record.conninfo);
|
||||||
|
|
||||||
log_notice(_("executing pg_rewind"));
|
log_notice(_("executing pg_rewind"));
|
||||||
log_debug("pg_rewind command is:\n %s",
|
log_debug("pg_rewind command is:\n %s",
|
||||||
|
|||||||
@@ -1360,8 +1360,6 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Initialise connection parameters to write as `primary_conninfo` */
|
/* Initialise connection parameters to write as `primary_conninfo` */
|
||||||
initialize_conninfo_params(&recovery_conninfo, false);
|
initialize_conninfo_params(&recovery_conninfo, false);
|
||||||
|
|
||||||
@@ -1540,17 +1538,15 @@ do_standby_switchover(void)
|
|||||||
PGconn *local_conn;
|
PGconn *local_conn;
|
||||||
PGconn *remote_conn;
|
PGconn *remote_conn;
|
||||||
|
|
||||||
|
|
||||||
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
|
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
|
|
||||||
/* the remote server is the primary to be demoted */
|
/* the remote server is the primary to be demoted */
|
||||||
char remote_conninfo[MAXCONNINFO] = "";
|
char remote_conninfo[MAXCONNINFO] = "";
|
||||||
char remote_host[MAXLEN] = "";
|
char remote_host[MAXLEN] = "";
|
||||||
int remote_node_id;
|
int remote_node_id;
|
||||||
t_node_info remote_node_record = T_NODE_INFO_INITIALIZER;
|
t_node_info remote_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
RecordStatus record_status;
|
RecordStatus record_status;
|
||||||
RecoveryType recovery_type;
|
RecoveryType recovery_type;
|
||||||
PQExpBufferData remote_command_str;
|
PQExpBufferData remote_command_str;
|
||||||
PQExpBufferData command_output;
|
PQExpBufferData command_output;
|
||||||
@@ -1565,6 +1561,10 @@ do_standby_switchover(void)
|
|||||||
/* store list of configuration files on the demotion candidate */
|
/* store list of configuration files on the demotion candidate */
|
||||||
KeyValueList remote_config_files = { NULL, NULL };
|
KeyValueList remote_config_files = { NULL, NULL };
|
||||||
|
|
||||||
|
/* store list of sibling nodes if --siblings-follow specified */
|
||||||
|
NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||||
|
int unreachable_sibling_node_count = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* SANITY CHECKS
|
* SANITY CHECKS
|
||||||
*
|
*
|
||||||
@@ -1847,9 +1847,6 @@ do_standby_switchover(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
PQfinish(remote_conn);
|
PQfinish(remote_conn);
|
||||||
PQfinish(local_conn);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Determine the remote's configuration file location */
|
/* Determine the remote's configuration file location */
|
||||||
/* -------------------------------------------------- */
|
/* -------------------------------------------------- */
|
||||||
@@ -1884,6 +1881,7 @@ do_standby_switchover(void)
|
|||||||
log_error(_("unable to find the specified repmgr configuration file on remote server"));
|
log_error(_("unable to find the specified repmgr configuration file on remote server"));
|
||||||
log_detail(_("remote configuration file is \"%s\""),
|
log_detail(_("remote configuration file is \"%s\""),
|
||||||
runtime_options.remote_config_file);
|
runtime_options.remote_config_file);
|
||||||
|
PQfinish(local_conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1952,10 +1950,82 @@ do_standby_switchover(void)
|
|||||||
{
|
{
|
||||||
log_error(_("no remote configuration file supplied or found in a default location - terminating"));
|
log_error(_("no remote configuration file supplied or found in a default location - terminating"));
|
||||||
log_hint(_("specify the remote configuration file with -C/--remote-config-file"));
|
log_hint(_("specify the remote configuration file with -C/--remote-config-file"));
|
||||||
|
PQfinish(local_conn);
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If --siblings-follow specified, get list and check they're reachable
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (runtime_options.siblings_follow == true)
|
||||||
|
{
|
||||||
|
char host[MAXLEN] = "";
|
||||||
|
NodeInfoListCell *cell;
|
||||||
|
|
||||||
|
get_active_sibling_node_records(local_conn,
|
||||||
|
local_node_record.node_id,
|
||||||
|
local_node_record.upstream_node_id,
|
||||||
|
&sibling_nodes);
|
||||||
|
|
||||||
|
log_verbose(LOG_INFO, _("%i active sibling nodes found"),
|
||||||
|
sibling_nodes.node_count);
|
||||||
|
|
||||||
|
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||||
|
{
|
||||||
|
/* get host from node record */
|
||||||
|
get_conninfo_value(cell->node_info->conninfo, "host", host);
|
||||||
|
r = test_ssh_connection(host, runtime_options.remote_user);
|
||||||
|
|
||||||
|
if (r != 0)
|
||||||
|
{
|
||||||
|
cell->node_info->reachable = false;
|
||||||
|
unreachable_sibling_node_count++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cell->node_info->reachable = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unreachable_sibling_node_count > 0)
|
||||||
|
{
|
||||||
|
if (runtime_options.force == false)
|
||||||
|
{
|
||||||
|
log_error(_("%i of %i sibling nodes unreachable via SSH:"),
|
||||||
|
unreachable_sibling_node_count,
|
||||||
|
sibling_nodes.node_count);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_warning(_("%i of %i sibling nodes unreachable via SSH:"),
|
||||||
|
unreachable_sibling_node_count,
|
||||||
|
sibling_nodes.node_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||||
|
{
|
||||||
|
if (cell->node_info->reachable == true)
|
||||||
|
continue;
|
||||||
|
log_detail(" %s (ID: %i)",
|
||||||
|
cell->node_info->node_name,
|
||||||
|
cell->node_info->node_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (runtime_options.force == false)
|
||||||
|
{
|
||||||
|
log_hint(_("use -F/--force to proceed in any case"));
|
||||||
|
PQfinish(local_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
log_detail(_("F/--force specified, proceeding anyway"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PQfinish(local_conn);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Sanity checks completed - prepare for the switchover
|
* Sanity checks completed - prepare for the switchover
|
||||||
@@ -2165,7 +2235,7 @@ do_standby_switchover(void)
|
|||||||
make_remote_repmgr_path(&remote_command_str);
|
make_remote_repmgr_path(&remote_command_str);
|
||||||
|
|
||||||
appendPQExpBuffer(&remote_command_str,
|
appendPQExpBuffer(&remote_command_str,
|
||||||
"%s--upstream-conninfo=\\'%s\\' node rejoin",
|
"%s-d \\'%s\\' node rejoin",
|
||||||
node_rejoin_options.data,
|
node_rejoin_options.data,
|
||||||
local_node_record.conninfo);
|
local_node_record.conninfo);
|
||||||
|
|
||||||
@@ -2218,6 +2288,57 @@ do_standby_switchover(void)
|
|||||||
log_detail(_("node \"%s\" is now primary"),
|
log_detail(_("node \"%s\" is now primary"),
|
||||||
local_node_record.node_name);
|
local_node_record.node_name);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If --siblings-follow specified, attempt to make them follow the
|
||||||
|
* new standby
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (runtime_options.siblings_follow == true)
|
||||||
|
{
|
||||||
|
int failed_follow_count = 0;
|
||||||
|
char host[MAXLEN] = "";
|
||||||
|
NodeInfoListCell *cell;
|
||||||
|
log_notice(_("executing STANDBY FOLLOW on %i of %i siblings"),
|
||||||
|
sibling_nodes.node_count - unreachable_sibling_node_count,
|
||||||
|
sibling_nodes.node_count);
|
||||||
|
|
||||||
|
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||||
|
{
|
||||||
|
int r = 0;
|
||||||
|
log_debug("XXX %s", cell->node_info->node_name);
|
||||||
|
/* skip nodes previously determined as unreachable */
|
||||||
|
if (cell->node_info->reachable == false)
|
||||||
|
{
|
||||||
|
log_debug(" XXX unreachable!");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
initPQExpBuffer(&remote_command_str);
|
||||||
|
make_remote_repmgr_path(&remote_command_str);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&remote_command_str,
|
||||||
|
"standby follow");
|
||||||
|
get_conninfo_value(cell->node_info->conninfo, "host", host);
|
||||||
|
log_debug("executing:\n \"%s\"", remote_command_str.data);
|
||||||
|
r = remote_command(
|
||||||
|
host,
|
||||||
|
runtime_options.remote_user,
|
||||||
|
remote_command_str.data,
|
||||||
|
NULL);
|
||||||
|
if (r != 0)
|
||||||
|
{
|
||||||
|
log_warning(_("STANDBY FOLLOW failed on node \"%s\""),
|
||||||
|
cell->node_info->node_name);
|
||||||
|
failed_follow_count++;
|
||||||
|
}
|
||||||
|
termPQExpBuffer(&remote_command_str);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (failed_follow_count == 0)
|
||||||
|
{
|
||||||
|
log_info(_("STANDBY FOLLOW"));
|
||||||
|
}
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3381,7 +3502,7 @@ copy_configuration_files(void)
|
|||||||
{
|
{
|
||||||
int i, r;
|
int i, r;
|
||||||
t_configfile_info *file;
|
t_configfile_info *file;
|
||||||
char *host;
|
char *host = NULL;
|
||||||
|
|
||||||
/* get host from upstream record */
|
/* get host from upstream record */
|
||||||
host = param_get(&recovery_conninfo, "host");
|
host = param_get(&recovery_conninfo, "host");
|
||||||
|
|||||||
@@ -86,6 +86,7 @@ typedef struct
|
|||||||
char remote_config_file[MAXPGPATH];
|
char remote_config_file[MAXPGPATH];
|
||||||
bool always_promote;
|
bool always_promote;
|
||||||
bool force_rewind;
|
bool force_rewind;
|
||||||
|
bool siblings_follow;
|
||||||
|
|
||||||
/* "node status" options */
|
/* "node status" options */
|
||||||
bool is_shutdown;
|
bool is_shutdown;
|
||||||
@@ -134,7 +135,7 @@ typedef struct
|
|||||||
/* "standby register" options */ \
|
/* "standby register" options */ \
|
||||||
false, 0, \
|
false, 0, \
|
||||||
/* "standby switchover" options */ \
|
/* "standby switchover" options */ \
|
||||||
"", false, false, \
|
"", false, false, false, \
|
||||||
/* "node status" options */ \
|
/* "node status" options */ \
|
||||||
false, \
|
false, \
|
||||||
/* "node check" options */ \
|
/* "node check" options */ \
|
||||||
|
|||||||
@@ -401,6 +401,7 @@ main(int argc, char **argv)
|
|||||||
/* "standby switchover" options *
|
/* "standby switchover" options *
|
||||||
* ---------------------------- */
|
* ---------------------------- */
|
||||||
|
|
||||||
|
/* -C/--remote-config-file */
|
||||||
case 'C':
|
case 'C':
|
||||||
strncpy(runtime_options.remote_config_file, optarg, MAXPGPATH);
|
strncpy(runtime_options.remote_config_file, optarg, MAXPGPATH);
|
||||||
break;
|
break;
|
||||||
@@ -413,6 +414,10 @@ main(int argc, char **argv)
|
|||||||
runtime_options.force_rewind = true;
|
runtime_options.force_rewind = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OPT_SIBLINGS_FOLLOW:
|
||||||
|
runtime_options.siblings_follow = true;
|
||||||
|
break;
|
||||||
|
|
||||||
/* "node status" options *
|
/* "node status" options *
|
||||||
* --------------------- */
|
* --------------------- */
|
||||||
|
|
||||||
@@ -1178,11 +1183,11 @@ check_cli_parameters(const int action)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case NODE_REJOIN:
|
case NODE_REJOIN:
|
||||||
if (runtime_options.upstream_conninfo[0] == '\0')
|
if (runtime_options.connection_param_provided == false)
|
||||||
{
|
{
|
||||||
item_list_append(
|
item_list_append(
|
||||||
&cli_errors,
|
&cli_errors,
|
||||||
"--upstream-conninfo must be provided with NODE REJOIN");
|
"database connection parameters for an available node must be provided when executing NODE REJOIN");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case CLUSTER_SHOW:
|
case CLUSTER_SHOW:
|
||||||
|
|||||||
@@ -69,6 +69,7 @@
|
|||||||
#define OPT_OPTFORMAT 1033
|
#define OPT_OPTFORMAT 1033
|
||||||
#define OPT_REPLICATION_LAG 1034
|
#define OPT_REPLICATION_LAG 1034
|
||||||
#define OPT_CONFIG_FILES 1035
|
#define OPT_CONFIG_FILES 1035
|
||||||
|
#define OPT_SIBLINGS_FOLLOW 1036
|
||||||
/* deprecated since 3.3 */
|
/* deprecated since 3.3 */
|
||||||
#define OPT_DATA_DIR 999
|
#define OPT_DATA_DIR 999
|
||||||
#define OPT_NO_CONNINFO_PASSWORD 998
|
#define OPT_NO_CONNINFO_PASSWORD 998
|
||||||
@@ -130,6 +131,7 @@ static struct option long_options[] =
|
|||||||
{"remote-config-file", required_argument, NULL, 'C'},
|
{"remote-config-file", required_argument, NULL, 'C'},
|
||||||
{"always-promote", no_argument, NULL, OPT_ALWAYS_PROMOTE },
|
{"always-promote", no_argument, NULL, OPT_ALWAYS_PROMOTE },
|
||||||
{"force-rewind", no_argument, NULL, OPT_FORCE_REWIND },
|
{"force-rewind", no_argument, NULL, OPT_FORCE_REWIND },
|
||||||
|
{"siblings-follow", no_argument, NULL, OPT_SIBLINGS_FOLLOW },
|
||||||
|
|
||||||
/* "node status" options */
|
/* "node status" options */
|
||||||
{"is-shutdown", no_argument, NULL, OPT_IS_SHUTDOWN },
|
{"is-shutdown", no_argument, NULL, OPT_IS_SHUTDOWN },
|
||||||
|
|||||||
Reference in New Issue
Block a user