mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 07:06:30 +00:00
repmgrd: during failover, check if other nodes have seen the primary
In a situation where only some standbys are cut off from the primary, a failover would result in a split brain/split cluster situation, as it's likely one of the cut-off standbys will promote itself, and other cut-off standbys (but not all standbys) will follow it. To prevent this happening, interrogate the other sibiling nodes to check whether they've seen the primary within a reasonably short interval; if this is the case, do not take any failover action. This feature is experimental.
This commit is contained in:
13
dbutils.c
13
dbutils.c
@@ -4818,6 +4818,7 @@ init_replication_info(ReplInfo *replication_info)
|
||||
replication_info->replication_lag_time = 0;
|
||||
replication_info->receiving_streamed_wal = true;
|
||||
replication_info->wal_replay_paused = false;
|
||||
replication_info->primary_last_seen = -1;
|
||||
}
|
||||
|
||||
|
||||
@@ -4844,7 +4845,8 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info)
|
||||
" END "
|
||||
" END AS replication_lag_time, "
|
||||
" last_wal_receive_lsn >= last_wal_replay_lsn AS receiving_streamed_wal, "
|
||||
" wal_replay_paused "
|
||||
" wal_replay_paused, "
|
||||
" primary_last_seen "
|
||||
" FROM ( "
|
||||
" SELECT CURRENT_TIMESTAMP AS ts, "
|
||||
" pg_catalog.pg_last_xact_replay_timestamp() AS last_xact_replay_timestamp, ");
|
||||
@@ -4858,7 +4860,7 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info)
|
||||
" CASE WHEN pg_catalog.pg_is_in_recovery() IS FALSE "
|
||||
" THEN FALSE "
|
||||
" ELSE pg_catalog.pg_is_wal_replay_paused() "
|
||||
" END AS wal_replay_paused ");
|
||||
" END AS wal_replay_paused, ");
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -4880,10 +4882,14 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info)
|
||||
" CASE WHEN pg_catalog.pg_is_in_recovery() IS FALSE "
|
||||
" THEN FALSE "
|
||||
" ELSE pg_catalog.pg_is_xlog_replay_paused() "
|
||||
" END AS wal_replay_paused ");
|
||||
" END AS wal_replay_paused, ");
|
||||
}
|
||||
|
||||
appendPQExpBufferStr(&query,
|
||||
" CASE WHEN pg_catalog.pg_is_in_recovery() IS FALSE "
|
||||
" THEN -1 "
|
||||
" ELSE repmgr.get_primary_last_seen() "
|
||||
" END AS primary_last_seen "
|
||||
" ) q ");
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_replication_info():\n%s", query.data);
|
||||
@@ -4905,6 +4911,7 @@ get_replication_info(PGconn *conn, ReplInfo *replication_info)
|
||||
replication_info->replication_lag_time = atoi(PQgetvalue(res, 0, 4));
|
||||
replication_info->receiving_streamed_wal = atobool(PQgetvalue(res, 0, 5));
|
||||
replication_info->wal_replay_paused = atobool(PQgetvalue(res, 0, 6));
|
||||
replication_info->primary_last_seen = atoi(PQgetvalue(res, 0, 7));
|
||||
}
|
||||
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
@@ -308,6 +308,7 @@ typedef struct
|
||||
int replication_lag_time;
|
||||
bool receiving_streamed_wal;
|
||||
bool wal_replay_paused;
|
||||
int primary_last_seen;
|
||||
} ReplInfo;
|
||||
|
||||
typedef struct
|
||||
|
||||
@@ -3103,6 +3103,9 @@ do_election(void)
|
||||
*/
|
||||
bool primary_location_seen = false;
|
||||
|
||||
|
||||
int nodes_with_primary_still_visible = 0;
|
||||
|
||||
electoral_term = get_current_term(local_conn);
|
||||
|
||||
if (electoral_term == -1)
|
||||
@@ -3306,7 +3309,7 @@ do_election(void)
|
||||
{
|
||||
/*
|
||||
* Theoretically the repmgrd on the node should have resumed WAL play
|
||||
* at this point
|
||||
* at this point.
|
||||
*/
|
||||
if (sibling_replication_info.last_wal_receive_lsn > sibling_replication_info.last_wal_replay_lsn)
|
||||
{
|
||||
@@ -3316,6 +3319,25 @@ do_election(void)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if node has seen primary "recently" - if so, we may have "partial primary visibility".
|
||||
* For now we'll assume the primary is visible if it's been seen less than
|
||||
* monitor_interval_secs * 2 seconds ago. We may need to adjust this, and/or make the value
|
||||
* configurable.
|
||||
*/
|
||||
|
||||
|
||||
if (sibling_replication_info.primary_last_seen < (config_file_options.monitor_interval_secs * 2))
|
||||
{
|
||||
nodes_with_primary_still_visible++;
|
||||
log_notice(_("node %i last saw primary node %i second(s) ago, considering primary still visible"),
|
||||
cell->node_info->node_id, sibling_replication_info.primary_last_seen);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info(_("node %i last saw primary node %i second(s) ago"),
|
||||
cell->node_info->node_id, sibling_replication_info.primary_last_seen);
|
||||
}
|
||||
/* get node's last receive LSN - if "higher" than current winner, current node is candidate */
|
||||
cell->node_info->last_wal_receive_lsn = sibling_replication_info.last_wal_receive_lsn;
|
||||
|
||||
@@ -3397,9 +3419,23 @@ do_election(void)
|
||||
return ELECTION_CANCELLED;
|
||||
}
|
||||
|
||||
log_debug("visible nodes: %i; total nodes: %i",
|
||||
if (nodes_with_primary_still_visible > 0)
|
||||
{
|
||||
log_notice(_("%i nodes can seen the primary"), nodes_with_primary_still_visible);
|
||||
// XXX list nodes as detail
|
||||
|
||||
monitoring_state = MS_DEGRADED;
|
||||
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
|
||||
|
||||
reset_node_voting_status();
|
||||
|
||||
return ELECTION_CANCELLED;
|
||||
}
|
||||
|
||||
log_info(_("visible nodes: %i; total nodes: %i; no nodes have seen the primary within the last %i seconds"),
|
||||
visible_nodes,
|
||||
total_nodes);
|
||||
total_nodes,
|
||||
(config_file_options.monitor_interval_secs * 2));
|
||||
|
||||
if (visible_nodes <= (total_nodes / 2.0))
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user