repmgrd: forcibly resume WAL replay if paused

If WAL replay is paused, and there is WAL pending replay, a promote command
will be queued until replay is resumed.

As it's conceivable that there are corner cases where one standby with
replay paused has actually received the most WAL, we'll forcibly
resume WAL replay so it can be reliably promoted, if needed.

Related to GitHub #540.
This commit is contained in:
Ian Barwick
2019-02-07 11:33:57 +09:00
parent c7b325e2a4
commit c4332d9a52

View File

@@ -3090,6 +3090,8 @@ do_election(void)
t_node_info *candidate_node = NULL; t_node_info *candidate_node = NULL;
ReplInfo local_replication_info;
/* /*
* Check if at least one server in the primary's location is visible; if * Check if at least one server in the primary's location is visible; if
* not we'll assume a network split between this node and the primary * not we'll assume a network split between this node and the primary
@@ -3182,7 +3184,37 @@ do_election(void)
} }
/* get our lsn */ /* get our lsn */
local_node_info.last_wal_receive_lsn = get_last_wal_receive_location(local_conn); if (get_replication_info(local_conn, &local_replication_info) == false)
{
log_error(_("unable to retrieve replication information for local node"));
return ELECTION_LOST;
}
/* check if WAL replay on local node is paused */
if (local_replication_info.wal_replay_paused == true)
{
log_debug("WAL replay is paused");
if (local_replication_info.last_wal_receive_lsn > local_replication_info.last_wal_replay_lsn)
{
log_warning(_("WAL replay on this node is paused and WAL is pending replay"));
log_detail(_("replay paused at %X/%X; last WAL received is %X/%X"),
format_lsn(local_replication_info.last_wal_replay_lsn),
format_lsn(local_replication_info.last_wal_receive_lsn));
}
/* attempt to resume WAL replay - unlikely this will fail, but just in case */
if (resume_wal_replay(local_conn) == false)
{
log_error(_("unable to resume WAL replay"));
log_detail(_("this node cannot be reliably promoted"));
return ELECTION_LOST;
}
log_notice(_("WAL replay forcibly resumed"));
}
local_node_info.last_wal_receive_lsn = local_replication_info.last_wal_receive_lsn;
log_debug("our last receive lsn: %X/%X", format_lsn(local_node_info.last_wal_receive_lsn)); log_debug("our last receive lsn: %X/%X", format_lsn(local_node_info.last_wal_receive_lsn));
@@ -3191,6 +3223,8 @@ do_election(void)
for (cell = sibling_nodes.head; cell; cell = cell->next) for (cell = sibling_nodes.head; cell; cell = cell->next)
{ {
ReplInfo sibling_replication_info;
/* assume the worst case */ /* assume the worst case */
cell->node_info->node_status = NODE_STATUS_UNKNOWN; cell->node_info->node_status = NODE_STATUS_UNKNOWN;
@@ -3227,60 +3261,80 @@ do_election(void)
if (cell->node_info->priority == 0) if (cell->node_info->priority == 0)
{ {
log_debug("node %i has priority of 0, skipping", log_debug("node %i has priority of 0, skipping",
cell->node_info->node_id); cell->node_info->node_id);
continue;
} }
else
if (get_replication_info(cell->node_info->conn, &sibling_replication_info) == false)
{ {
/* get node's last receive LSN - if "higher" than current winner, current node is candidate */ log_warning(_("unable to retrieve replication information for node %i, skipping"),
cell->node_info->last_wal_receive_lsn = get_last_wal_receive_location(cell->node_info->conn); cell->node_info->node_id);
continue;
}
log_verbose(LOG_DEBUG, "node %i's last receive LSN is: %X/%X", /* check if WAL replay on node is paused */
cell->node_info->node_id, if (sibling_replication_info.wal_replay_paused == true)
format_lsn(cell->node_info->last_wal_receive_lsn)); {
/*
/* compare LSN */ * Theoretically the repmgrd on the node should have resumed WAL play
if (cell->node_info->last_wal_receive_lsn > candidate_node->last_wal_receive_lsn) * at this point
*/
if (sibling_replication_info.last_wal_receive_lsn > sibling_replication_info.last_wal_replay_lsn)
{ {
/* other node is ahead */ log_warning(_("WAL replay on node %i is paused and WAL is pending replay"),
log_verbose(LOG_DEBUG, "node %i is ahead of current candidate %i", cell->node_info->node_id);
cell->node_info->node_id, }
candidate_node->node_id); }
/* get node's last receive LSN - if "higher" than current winner, current node is candidate */
cell->node_info->last_wal_receive_lsn = sibling_replication_info.last_wal_receive_lsn;
log_verbose(LOG_DEBUG, "node %i's last receive LSN is: %X/%X",
cell->node_info->node_id,
format_lsn(cell->node_info->last_wal_receive_lsn));
/* compare LSN */
if (cell->node_info->last_wal_receive_lsn > candidate_node->last_wal_receive_lsn)
{
/* other node is ahead */
log_verbose(LOG_DEBUG, "node %i is ahead of current candidate %i",
cell->node_info->node_id,
candidate_node->node_id);
candidate_node = cell->node_info;
}
/* LSN is same - tiebreak on priority, then node_id */
else if (cell->node_info->last_wal_receive_lsn == candidate_node->last_wal_receive_lsn)
{
log_verbose(LOG_DEBUG, "node %i has same LSN as current candidate %i",
cell->node_info->node_id,
candidate_node->node_id);
if (cell->node_info->priority > candidate_node->priority)
{
log_verbose(LOG_DEBUG, "node %i has higher priority (%i) than current candidate %i (%i)",
cell->node_info->node_id,
cell->node_info->priority,
candidate_node->node_id,
candidate_node->priority);
candidate_node = cell->node_info; candidate_node = cell->node_info;
} }
/* LSN is same - tiebreak on priority, then node_id */ else if (cell->node_info->priority == candidate_node->priority)
else if (cell->node_info->last_wal_receive_lsn == candidate_node->last_wal_receive_lsn)
{ {
log_verbose(LOG_DEBUG, "node %i has same LSN as current candidate %i", if (cell->node_info->node_id < candidate_node->node_id)
cell->node_info->node_id,
candidate_node->node_id);
if (cell->node_info->priority > candidate_node->priority)
{ {
log_verbose(LOG_DEBUG, "node %i has higher priority (%i) than current candidate %i (%i)", log_verbose(LOG_DEBUG, "node %i has same priority but lower node_id than current candidate %i",
cell->node_info->node_id, cell->node_info->node_id,
cell->node_info->priority, candidate_node->node_id);
candidate_node->node_id,
candidate_node->priority);
candidate_node = cell->node_info; candidate_node = cell->node_info;
} }
else if (cell->node_info->priority == candidate_node->priority) }
{ else
if (cell->node_info->node_id < candidate_node->node_id) {
{ log_verbose(LOG_DEBUG, "node %i has lower priority (%i) than current candidate %i (%i)",
log_verbose(LOG_DEBUG, "node %i has same priority but lower node_id than current candidate %i", cell->node_info->node_id,
cell->node_info->node_id, cell->node_info->priority,
candidate_node->node_id); candidate_node->node_id,
candidate_node = cell->node_info; candidate_node->priority);
}
}
else
{
log_verbose(LOG_DEBUG, "node %i has lower priority (%i) than current candidate %i (%i)",
cell->node_info->node_id,
cell->node_info->priority,
candidate_node->node_id,
candidate_node->priority);
}
} }
} }
} }