mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-26 16:46:28 +00:00
Separate promotion candidate walsender/slot checks into discrete functions
For use by "standby promote" as well as "standby follow"
This commit is contained in:
@@ -115,6 +115,9 @@ static char *make_barman_ssh_command(char *buf);
|
|||||||
static bool create_recovery_file(t_node_info *node_record, t_conninfo_param_list *recovery_conninfo, char *dest, bool as_file);
|
static bool create_recovery_file(t_node_info *node_record, t_conninfo_param_list *recovery_conninfo, char *dest, bool as_file);
|
||||||
static void write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list);
|
static void write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list);
|
||||||
static bool check_sibling_nodes(NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats);
|
static bool check_sibling_nodes(NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats);
|
||||||
|
static bool check_free_wal_senders(int available_wal_senders, SiblingNodeStats *sibling_nodes_stats);
|
||||||
|
static bool check_free_slots(t_node_info *local_node_record, SiblingNodeStats *sibling_nodes_stats);
|
||||||
|
|
||||||
static void sibling_nodes_follow(t_node_info *local_node_record, NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats);
|
static void sibling_nodes_follow(t_node_info *local_node_record, NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats);
|
||||||
|
|
||||||
static NodeStatus parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr *checkPoint);
|
static NodeStatus parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr *checkPoint);
|
||||||
@@ -2031,6 +2034,13 @@ do_standby_promote(void)
|
|||||||
|
|
||||||
int existing_primary_id = UNKNOWN_NODE_ID;
|
int existing_primary_id = UNKNOWN_NODE_ID;
|
||||||
|
|
||||||
|
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||||
|
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
|
NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||||
|
SiblingNodeStats sibling_nodes_stats = T_SIBLING_NODES_STATS_INITIALIZER;
|
||||||
|
int available_wal_senders = 0;
|
||||||
|
|
||||||
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
||||||
|
|
||||||
log_verbose(LOG_INFO, _("connected to standby, checking its state"));
|
log_verbose(LOG_INFO, _("connected to standby, checking its state"));
|
||||||
@@ -2061,6 +2071,17 @@ do_standby_promote(void)
|
|||||||
log_info(_("node is a standby"));
|
log_info(_("node is a standby"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
record_status = get_node_record(local_conn, config_file_options.node_id, &local_node_record);
|
||||||
|
if (record_status != RECORD_FOUND)
|
||||||
|
{
|
||||||
|
log_error(_("unable to retrieve node record for node %i"),
|
||||||
|
config_file_options.node_id);
|
||||||
|
|
||||||
|
PQfinish(local_conn);
|
||||||
|
|
||||||
|
exit(ERR_DB_QUERY);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Executing "pg_ctl ... promote" when WAL replay is paused and
|
* Executing "pg_ctl ... promote" when WAL replay is paused and
|
||||||
* WAL is pending replay will mean the standby will not promote
|
* WAL is pending replay will mean the standby will not promote
|
||||||
@@ -2154,6 +2175,60 @@ do_standby_promote(void)
|
|||||||
|
|
||||||
PQfinish(current_primary_conn);
|
PQfinish(current_primary_conn);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* populate local node record with current state of various replication-related
|
||||||
|
* values, so we can check for sufficient walsenders and replication slots
|
||||||
|
*/
|
||||||
|
get_node_replication_stats(local_conn, &local_node_record);
|
||||||
|
|
||||||
|
available_wal_senders = local_node_record.max_wal_senders -
|
||||||
|
local_node_record.attached_wal_receivers;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get list of sibling nodes; if --siblings-follow specified,
|
||||||
|
* check they're reachable; if not, the list will be used to warn
|
||||||
|
* about nodes which will not follow the new primary
|
||||||
|
*/
|
||||||
|
get_active_sibling_node_records(local_conn,
|
||||||
|
local_node_record.node_id,
|
||||||
|
local_node_record.upstream_node_id,
|
||||||
|
&sibling_nodes);
|
||||||
|
|
||||||
|
if (check_sibling_nodes(&sibling_nodes, &sibling_nodes_stats) == false)
|
||||||
|
{
|
||||||
|
PQfinish(local_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* check there are sufficient free walsenders - obviously there's potential
|
||||||
|
* for a later race condition if some walsenders come into use before the
|
||||||
|
* promote operation gets around to attaching the sibling nodes, but
|
||||||
|
* this should catch any actual existing configuration issue (and if anyone's
|
||||||
|
* performing a promote in such an unstable environment, they only have
|
||||||
|
* themselves to blame).
|
||||||
|
*/
|
||||||
|
if (check_free_wal_senders(available_wal_senders, &sibling_nodes_stats) == false)
|
||||||
|
{
|
||||||
|
if (runtime_options.dry_run == false)
|
||||||
|
{
|
||||||
|
PQfinish(local_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if replication slots are required by siblings,
|
||||||
|
* check the promotion candidate has sufficient free slots
|
||||||
|
*/
|
||||||
|
if (check_free_slots(&local_node_record, &sibling_nodes_stats) == false)
|
||||||
|
{
|
||||||
|
PQfinish(local_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
if (runtime_options.dry_run == true)
|
if (runtime_options.dry_run == true)
|
||||||
{
|
{
|
||||||
log_info(_("prerequisites for executing STANDBY PROMOTE are met"));
|
log_info(_("prerequisites for executing STANDBY PROMOTE are met"));
|
||||||
@@ -3182,7 +3257,6 @@ do_standby_switchover(void)
|
|||||||
/* store list of configuration files on the demotion candidate */
|
/* store list of configuration files on the demotion candidate */
|
||||||
KeyValueList remote_config_files = {NULL, NULL};
|
KeyValueList remote_config_files = {NULL, NULL};
|
||||||
|
|
||||||
/* store list of sibling nodes if --siblings-follow specified */
|
|
||||||
NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||||
SiblingNodeStats sibling_nodes_stats = T_SIBLING_NODES_STATS_INITIALIZER;
|
SiblingNodeStats sibling_nodes_stats = T_SIBLING_NODES_STATS_INITIALIZER;
|
||||||
|
|
||||||
@@ -3602,8 +3676,9 @@ do_standby_switchover(void)
|
|||||||
local_node_record.attached_wal_receivers;
|
local_node_record.attached_wal_receivers;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If --siblings-follow specified, get list and check they're reachable
|
* Get list of sibling nodes; if --siblings-follow specified,
|
||||||
* (if not just issue a warning)
|
* check they're reachable; if not, the list will be used to warn
|
||||||
|
* about nodes which will remain attached to the demotion candidate
|
||||||
*/
|
*/
|
||||||
get_active_sibling_node_records(local_conn,
|
get_active_sibling_node_records(local_conn,
|
||||||
local_node_record.node_id,
|
local_node_record.node_id,
|
||||||
@@ -3616,6 +3691,7 @@ do_standby_switchover(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* check there are sufficient free walsenders - obviously there's potential
|
* check there are sufficient free walsenders - obviously there's potential
|
||||||
* for a later race condition if some walsenders come into use before the
|
* for a later race condition if some walsenders come into use before the
|
||||||
@@ -3624,37 +3700,13 @@ do_standby_switchover(void)
|
|||||||
* performing a switchover in such an unstable environment, they only have
|
* performing a switchover in such an unstable environment, they only have
|
||||||
* themselves to blame).
|
* themselves to blame).
|
||||||
*/
|
*/
|
||||||
if (available_wal_senders < sibling_nodes_stats.min_required_wal_senders)
|
if (check_free_wal_senders(available_wal_senders, &sibling_nodes_stats) == false)
|
||||||
{
|
{
|
||||||
if (runtime_options.force == false || runtime_options.dry_run == true)
|
|
||||||
{
|
|
||||||
log_error(_("insufficient free walsenders on promotion candidate"));
|
|
||||||
log_detail(_("at least %i walsenders required but only %i free walsenders on promotion candidate"),
|
|
||||||
sibling_nodes_stats.min_required_wal_senders,
|
|
||||||
available_wal_senders);
|
|
||||||
log_hint(_("increase parameter \"max_wal_senders\" or use -F/--force to proceed in any case"));
|
|
||||||
|
|
||||||
if (runtime_options.dry_run == false)
|
if (runtime_options.dry_run == false)
|
||||||
{
|
|
||||||
PQfinish(local_conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
log_warning(_("insufficient free walsenders on promotion candidate"));
|
PQfinish(local_conn);
|
||||||
log_detail(_("at least %i walsenders required but only %i free walsender(s) on promotion candidate"),
|
exit(ERR_BAD_CONFIG);
|
||||||
sibling_nodes_stats.min_required_wal_senders,
|
|
||||||
available_wal_senders);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (runtime_options.dry_run == true)
|
|
||||||
{
|
|
||||||
log_info(_("%i walsenders required, %i available"),
|
|
||||||
sibling_nodes_stats.min_required_wal_senders,
|
|
||||||
available_wal_senders);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3903,50 +3955,17 @@ do_standby_switchover(void)
|
|||||||
|
|
||||||
PQfinish(remote_conn);
|
PQfinish(remote_conn);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* if replication slots are required by demotion candidate and/or siblings,
|
* if replication slots are required by demotion candidate and/or siblings,
|
||||||
* check the promotion candidate has sufficient free slots
|
* check the promotion candidate has sufficient free slots
|
||||||
*/
|
*/
|
||||||
|
if (check_free_slots(&local_node_record, &sibling_nodes_stats) == false)
|
||||||
if (sibling_nodes_stats.min_required_free_slots > 0 )
|
|
||||||
{
|
{
|
||||||
int available_slots = local_node_record.max_replication_slots -
|
PQfinish(local_conn);
|
||||||
local_node_record.active_replication_slots;
|
exit(ERR_BAD_CONFIG);
|
||||||
|
|
||||||
log_debug("minimum of %i free slots (%i for siblings) required; %i available",
|
|
||||||
sibling_nodes_stats.min_required_free_slots,
|
|
||||||
sibling_nodes_stats.reachable_sibling_nodes_with_slot_count,
|
|
||||||
available_slots);
|
|
||||||
|
|
||||||
if (available_slots < sibling_nodes_stats.min_required_free_slots)
|
|
||||||
{
|
|
||||||
if (runtime_options.force == false || runtime_options.dry_run == true)
|
|
||||||
{
|
|
||||||
log_error(_("insufficient free replication slots to attach all nodes"));
|
|
||||||
log_detail(_("at least %i additional replication slots required but only %i free slots available on promotion candidate"),
|
|
||||||
sibling_nodes_stats.min_required_free_slots,
|
|
||||||
available_slots);
|
|
||||||
log_hint(_("increase parameter \"max_replication_slots\" or use -F/--force to proceed in any case"));
|
|
||||||
|
|
||||||
if (runtime_options.dry_run == false)
|
|
||||||
{
|
|
||||||
PQfinish(local_conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (runtime_options.dry_run == true)
|
|
||||||
{
|
|
||||||
log_info(_("%i replication slots required, %i available"),
|
|
||||||
sibling_nodes_stats.min_required_free_slots,
|
|
||||||
available_slots);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Attempt to pause all repmgrd instances, unless user explicitly
|
* Attempt to pause all repmgrd instances, unless user explicitly
|
||||||
* specifies not to.
|
* specifies not to.
|
||||||
@@ -6973,6 +6992,12 @@ write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For "standby promote" and "standby follow", check for sibling nodes.
|
||||||
|
* If "--siblings-follow" was specified, fill the provided SiblingNodeStats
|
||||||
|
* struct with some aggregate info about the nodes for later
|
||||||
|
* decision making.
|
||||||
|
*/
|
||||||
static bool
|
static bool
|
||||||
check_sibling_nodes(NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats)
|
check_sibling_nodes(NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats)
|
||||||
{
|
{
|
||||||
@@ -7106,6 +7131,91 @@ check_sibling_nodes(NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool
|
||||||
|
check_free_wal_senders(int available_wal_senders, SiblingNodeStats *sibling_nodes_stats)
|
||||||
|
{
|
||||||
|
if (available_wal_senders < sibling_nodes_stats->min_required_wal_senders)
|
||||||
|
{
|
||||||
|
if (runtime_options.force == false || runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
log_error(_("insufficient free walsenders on promotion candidate"));
|
||||||
|
log_detail(_("at least %i walsenders required but only %i free walsenders on promotion candidate"),
|
||||||
|
sibling_nodes_stats->min_required_wal_senders,
|
||||||
|
available_wal_senders);
|
||||||
|
log_hint(_("increase parameter \"max_wal_senders\" or use -F/--force to proceed in any case"));
|
||||||
|
|
||||||
|
if (runtime_options.dry_run == false)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_warning(_("insufficient free walsenders on promotion candidate"));
|
||||||
|
log_detail(_("at least %i walsenders required but only %i free walsender(s) on promotion candidate"),
|
||||||
|
sibling_nodes_stats->min_required_wal_senders,
|
||||||
|
available_wal_senders);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
log_info(_("%i walsenders required, %i available"),
|
||||||
|
sibling_nodes_stats->min_required_wal_senders,
|
||||||
|
available_wal_senders);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static bool
|
||||||
|
check_free_slots(t_node_info *local_node_record, SiblingNodeStats *sibling_nodes_stats)
|
||||||
|
{
|
||||||
|
if (sibling_nodes_stats->min_required_free_slots > 0 )
|
||||||
|
{
|
||||||
|
int available_slots = local_node_record->max_replication_slots -
|
||||||
|
local_node_record->active_replication_slots;
|
||||||
|
|
||||||
|
log_debug("minimum of %i free slots (%i for siblings) required; %i available",
|
||||||
|
sibling_nodes_stats->min_required_free_slots,
|
||||||
|
sibling_nodes_stats->reachable_sibling_nodes_with_slot_count,
|
||||||
|
available_slots);
|
||||||
|
|
||||||
|
if (available_slots < sibling_nodes_stats->min_required_free_slots)
|
||||||
|
{
|
||||||
|
if (runtime_options.force == false || runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
log_error(_("insufficient free replication slots to attach all nodes"));
|
||||||
|
log_detail(_("at least %i additional replication slots required but only %i free slots available on promotion candidate"),
|
||||||
|
sibling_nodes_stats->min_required_free_slots,
|
||||||
|
available_slots);
|
||||||
|
log_hint(_("increase parameter \"max_replication_slots\" or use -F/--force to proceed in any case"));
|
||||||
|
|
||||||
|
if (runtime_options.dry_run == false)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
log_info(_("%i replication slots required, %i available"),
|
||||||
|
sibling_nodes_stats->min_required_free_slots,
|
||||||
|
available_slots);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
sibling_nodes_follow(t_node_info *local_node_record, NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats)
|
sibling_nodes_follow(t_node_info *local_node_record, NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats)
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user