mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-26 00:26:30 +00:00
repmgrd: ensure local node is counted as quorum member
Rename "standby_nodes" to "sibling_nodes" to make it clearer in the code what total is actually provided by the struct. Addresses GitHub #439.
This commit is contained in:
@@ -58,7 +58,7 @@ static FailoverState failover_state = FAILOVER_STATE_UNKNOWN;
|
|||||||
|
|
||||||
static int primary_node_id = UNKNOWN_NODE_ID;
|
static int primary_node_id = UNKNOWN_NODE_ID;
|
||||||
static t_node_info upstream_node_info = T_NODE_INFO_INITIALIZER;
|
static t_node_info upstream_node_info = T_NODE_INFO_INITIALIZER;
|
||||||
static NodeInfoList standby_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
static NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||||
|
|
||||||
|
|
||||||
static ElectionResult do_election(void);
|
static ElectionResult do_election(void);
|
||||||
@@ -941,8 +941,8 @@ monitor_streaming_standby(void)
|
|||||||
get_active_sibling_node_records(local_conn,
|
get_active_sibling_node_records(local_conn,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
former_upstream_node_id,
|
former_upstream_node_id,
|
||||||
&standby_nodes);
|
&sibling_nodes);
|
||||||
notify_followers(&standby_nodes, local_node_info.node_id);
|
notify_followers(&sibling_nodes, local_node_info.node_id);
|
||||||
|
|
||||||
/* this will restart monitoring in primary mode */
|
/* this will restart monitoring in primary mode */
|
||||||
monitoring_state = MS_NORMAL;
|
monitoring_state = MS_NORMAL;
|
||||||
@@ -981,12 +981,12 @@ monitor_streaming_standby(void)
|
|||||||
get_active_sibling_node_records(local_conn,
|
get_active_sibling_node_records(local_conn,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
local_node_info.upstream_node_id,
|
local_node_info.upstream_node_id,
|
||||||
&standby_nodes);
|
&sibling_nodes);
|
||||||
|
|
||||||
if (standby_nodes.node_count > 0)
|
if (sibling_nodes.node_count > 0)
|
||||||
{
|
{
|
||||||
log_debug("scanning %i node records to detect new primary...", standby_nodes.node_count);
|
log_debug("scanning %i node records to detect new primary...", sibling_nodes.node_count);
|
||||||
for (cell = standby_nodes.head; cell; cell = cell->next)
|
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
/* skip local node check, we did that above */
|
/* skip local node check, we did that above */
|
||||||
if (cell->node_info->node_id == local_node_info.node_id)
|
if (cell->node_info->node_id == local_node_info.node_id)
|
||||||
@@ -1016,7 +1016,7 @@ monitor_streaming_standby(void)
|
|||||||
follow_new_primary(follow_node_id);
|
follow_new_primary(follow_node_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
clear_node_info_list(&standby_nodes);
|
clear_node_info_list(&sibling_nodes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1418,12 +1418,12 @@ monitor_streaming_witness(void)
|
|||||||
get_active_sibling_node_records(local_conn,
|
get_active_sibling_node_records(local_conn,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
local_node_info.upstream_node_id,
|
local_node_info.upstream_node_id,
|
||||||
&standby_nodes);
|
&sibling_nodes);
|
||||||
|
|
||||||
if (standby_nodes.node_count > 0)
|
if (sibling_nodes.node_count > 0)
|
||||||
{
|
{
|
||||||
log_debug("scanning %i node records to detect new primary...", standby_nodes.node_count);
|
log_debug("scanning %i node records to detect new primary...", sibling_nodes.node_count);
|
||||||
for (cell = standby_nodes.head; cell; cell = cell->next)
|
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
/* skip local node check, we did that above */
|
/* skip local node check, we did that above */
|
||||||
if (cell->node_info->node_id == local_node_info.node_id)
|
if (cell->node_info->node_id == local_node_info.node_id)
|
||||||
@@ -1453,7 +1453,7 @@ monitor_streaming_witness(void)
|
|||||||
witness_follow_new_primary(follow_node_id);
|
witness_follow_new_primary(follow_node_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
clear_node_info_list(&standby_nodes);
|
clear_node_info_list(&sibling_nodes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
loop:
|
loop:
|
||||||
@@ -1554,7 +1554,7 @@ do_primary_failover(void)
|
|||||||
}
|
}
|
||||||
else if (election_result == ELECTION_WON)
|
else if (election_result == ELECTION_WON)
|
||||||
{
|
{
|
||||||
if (standby_nodes.node_count > 0)
|
if (sibling_nodes.node_count > 0)
|
||||||
{
|
{
|
||||||
log_notice("this node is the winner, will now promote itself and inform other nodes");
|
log_notice("this node is the winner, will now promote itself and inform other nodes");
|
||||||
}
|
}
|
||||||
@@ -1599,7 +1599,7 @@ do_primary_failover(void)
|
|||||||
get_active_sibling_node_records(local_conn,
|
get_active_sibling_node_records(local_conn,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
upstream_node_info.node_id,
|
upstream_node_info.node_id,
|
||||||
&standby_nodes);
|
&sibling_nodes);
|
||||||
|
|
||||||
}
|
}
|
||||||
else if (config_file_options.failover == FAILOVER_MANUAL)
|
else if (config_file_options.failover == FAILOVER_MANUAL)
|
||||||
@@ -1661,10 +1661,10 @@ do_primary_failover(void)
|
|||||||
{
|
{
|
||||||
case FAILOVER_STATE_PROMOTED:
|
case FAILOVER_STATE_PROMOTED:
|
||||||
/* notify former siblings that they should now follow this node */
|
/* notify former siblings that they should now follow this node */
|
||||||
notify_followers(&standby_nodes, local_node_info.node_id);
|
notify_followers(&sibling_nodes, local_node_info.node_id);
|
||||||
|
|
||||||
/* we no longer care about our former siblings */
|
/* we no longer care about our former siblings */
|
||||||
clear_node_info_list(&standby_nodes);
|
clear_node_info_list(&sibling_nodes);
|
||||||
|
|
||||||
/* pass control back down to start_monitoring() */
|
/* pass control back down to start_monitoring() */
|
||||||
log_info(_("switching to primary monitoring mode"));
|
log_info(_("switching to primary monitoring mode"));
|
||||||
@@ -1678,10 +1678,10 @@ do_primary_failover(void)
|
|||||||
* notify siblings that they should resume following the original
|
* notify siblings that they should resume following the original
|
||||||
* primary
|
* primary
|
||||||
*/
|
*/
|
||||||
notify_followers(&standby_nodes, upstream_node_info.node_id);
|
notify_followers(&sibling_nodes, upstream_node_info.node_id);
|
||||||
|
|
||||||
/* we no longer care about our former siblings */
|
/* we no longer care about our former siblings */
|
||||||
clear_node_info_list(&standby_nodes);
|
clear_node_info_list(&sibling_nodes);
|
||||||
|
|
||||||
/* pass control back down to start_monitoring() */
|
/* pass control back down to start_monitoring() */
|
||||||
log_info(_("resuming standby monitoring mode"));
|
log_info(_("resuming standby monitoring mode"));
|
||||||
@@ -2566,6 +2566,7 @@ do_election(void)
|
|||||||
|
|
||||||
/* we're visible */
|
/* we're visible */
|
||||||
int visible_nodes = 1;
|
int visible_nodes = 1;
|
||||||
|
int total_nodes = 0;
|
||||||
|
|
||||||
NodeInfoListCell *cell = NULL;
|
NodeInfoListCell *cell = NULL;
|
||||||
|
|
||||||
@@ -2616,14 +2617,16 @@ do_election(void)
|
|||||||
get_active_sibling_node_records(local_conn,
|
get_active_sibling_node_records(local_conn,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
upstream_node_info.node_id,
|
upstream_node_info.node_id,
|
||||||
&standby_nodes);
|
&sibling_nodes);
|
||||||
|
|
||||||
|
total_nodes = sibling_nodes.node_count + 1;
|
||||||
|
|
||||||
log_debug("do_election(): primary location is %s", upstream_node_info.location);
|
log_debug("do_election(): primary location is %s", upstream_node_info.location);
|
||||||
|
|
||||||
local_node_info.last_wal_receive_lsn = InvalidXLogRecPtr;
|
local_node_info.last_wal_receive_lsn = InvalidXLogRecPtr;
|
||||||
|
|
||||||
/* fast path if no other standbys (or witness) exists - normally win by default */
|
/* fast path if no other standbys (or witness) exists - normally win by default */
|
||||||
if (standby_nodes.node_count == 0)
|
if (sibling_nodes.node_count == 0)
|
||||||
{
|
{
|
||||||
if (strncmp(upstream_node_info.location, local_node_info.location, MAXLEN) == 0)
|
if (strncmp(upstream_node_info.location, local_node_info.location, MAXLEN) == 0)
|
||||||
{
|
{
|
||||||
@@ -2666,7 +2669,7 @@ do_election(void)
|
|||||||
/* pointer to "winning" node, initially self */
|
/* pointer to "winning" node, initially self */
|
||||||
candidate_node = &local_node_info;
|
candidate_node = &local_node_info;
|
||||||
|
|
||||||
for (cell = standby_nodes.head; cell; cell = cell->next)
|
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
/* assume the worst case */
|
/* assume the worst case */
|
||||||
cell->node_info->node_status = NODE_STATUS_UNKNOWN;
|
cell->node_info->node_status = NODE_STATUS_UNKNOWN;
|
||||||
@@ -2721,7 +2724,7 @@ do_election(void)
|
|||||||
candidate_node = cell->node_info;
|
candidate_node = cell->node_info;
|
||||||
}
|
}
|
||||||
/* LSN is same - tiebreak on priority, then node_id */
|
/* LSN is same - tiebreak on priority, then node_id */
|
||||||
else if(cell->node_info->last_wal_receive_lsn == candidate_node->last_wal_receive_lsn)
|
else if (cell->node_info->last_wal_receive_lsn == candidate_node->last_wal_receive_lsn)
|
||||||
{
|
{
|
||||||
log_verbose(LOG_DEBUG, "node %i has same LSN as current candidate %i",
|
log_verbose(LOG_DEBUG, "node %i has same LSN as current candidate %i",
|
||||||
cell->node_info->node_id,
|
cell->node_info->node_id,
|
||||||
@@ -2773,9 +2776,9 @@ do_election(void)
|
|||||||
|
|
||||||
log_debug("visible nodes: %i; total nodes: %i",
|
log_debug("visible nodes: %i; total nodes: %i",
|
||||||
visible_nodes,
|
visible_nodes,
|
||||||
standby_nodes.node_count);
|
total_nodes);
|
||||||
|
|
||||||
if (visible_nodes <= (standby_nodes.node_count / 2.0))
|
if (visible_nodes <= (total_nodes / 2.0))
|
||||||
{
|
{
|
||||||
log_notice(_("unable to reach a qualified majority of nodes"));
|
log_notice(_("unable to reach a qualified majority of nodes"));
|
||||||
log_detail(_("node will enter degraded monitoring state waiting for reconnect"));
|
log_detail(_("node will enter degraded monitoring state waiting for reconnect"));
|
||||||
|
|||||||
Reference in New Issue
Block a user