mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
repmgrd: improve primary visibility consensus check
Exclude sibling nodes which report they're following a different node. This shouldn't happen, but could.
This commit is contained in:
15
dbutils.c
15
dbutils.c
@@ -4993,7 +4993,8 @@ get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replicatio
|
||||
" END AS replication_lag_time, "
|
||||
" last_wal_receive_lsn >= last_wal_replay_lsn AS receiving_streamed_wal, "
|
||||
" wal_replay_paused, "
|
||||
" upstream_last_seen "
|
||||
" upstream_last_seen, "
|
||||
" upstream_node_id "
|
||||
" FROM ( "
|
||||
" SELECT CURRENT_TIMESTAMP AS ts, "
|
||||
" pg_catalog.pg_is_in_recovery() AS in_recovery, "
|
||||
@@ -5033,10 +5034,12 @@ get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replicatio
|
||||
" END AS wal_replay_paused, ");
|
||||
}
|
||||
|
||||
/* Add information about upstream node from shared memory */
|
||||
if (node_type == WITNESS)
|
||||
{
|
||||
appendPQExpBufferStr(&query,
|
||||
" repmgr.get_upstream_last_seen() AS upstream_last_seen");
|
||||
" repmgr.get_upstream_last_seen() AS upstream_last_seen, "
|
||||
" repmgr.get_upstream_node_id() AS upstream_node_id ");
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -5044,7 +5047,12 @@ get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replicatio
|
||||
" CASE WHEN pg_catalog.pg_is_in_recovery() IS FALSE "
|
||||
" THEN -1 "
|
||||
" ELSE repmgr.get_upstream_last_seen() "
|
||||
" END AS upstream_last_seen ");
|
||||
" END AS upstream_last_seen, ");
|
||||
appendPQExpBufferStr(&query,
|
||||
" CASE WHEN pg_catalog.pg_is_in_recovery() IS FALSE "
|
||||
" THEN -1 "
|
||||
" ELSE repmgr.get_upstream_node_id() "
|
||||
" END AS upstream_node_id ");
|
||||
}
|
||||
|
||||
appendPQExpBufferStr(&query,
|
||||
@@ -5075,6 +5083,7 @@ get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replicatio
|
||||
replication_info->receiving_streamed_wal = atobool(PQgetvalue(res, 0, 6));
|
||||
replication_info->wal_replay_paused = atobool(PQgetvalue(res, 0, 7));
|
||||
replication_info->upstream_last_seen = atoi(PQgetvalue(res, 0, 8));
|
||||
replication_info->upstream_node_id = atoi(PQgetvalue(res, 0, 9));
|
||||
}
|
||||
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
@@ -310,6 +310,7 @@ typedef struct
|
||||
bool receiving_streamed_wal;
|
||||
bool wal_replay_paused;
|
||||
int upstream_last_seen;
|
||||
int upstream_node_id;
|
||||
} ReplInfo;
|
||||
|
||||
typedef struct
|
||||
|
||||
4
repmgr.c
4
repmgr.c
@@ -436,10 +436,6 @@ get_upstream_node_id(PG_FUNCTION_ARGS)
|
||||
if (!shared_state)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
/* A primary node cannot have an upstream ID */
|
||||
if (!RecoveryInProgress())
|
||||
PG_RETURN_INT32(UNKNOWN_NODE_ID);
|
||||
|
||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||
upstream_node_id = shared_state->upstream_node_id;
|
||||
LWLockRelease(shared_state->lock);
|
||||
|
||||
@@ -3633,15 +3633,25 @@ do_election(NodeInfoList *sibling_nodes, int *new_primary_id)
|
||||
|
||||
if (sibling_replication_info.upstream_last_seen >= 0 && sibling_replication_info.upstream_last_seen < (config_file_options.monitor_interval_secs * 2))
|
||||
{
|
||||
nodes_with_primary_still_visible++;
|
||||
log_notice(_("node %i last saw primary node %i second(s) ago, considering primary still visible"),
|
||||
cell->node_info->node_id,
|
||||
sibling_replication_info.upstream_last_seen);
|
||||
appendPQExpBuffer(&nodes_with_primary_visible,
|
||||
" - node \"%s\" (ID: %i): %i second(s) ago\n",
|
||||
cell->node_info->node_name,
|
||||
cell->node_info->node_id,
|
||||
sibling_replication_info.upstream_last_seen);
|
||||
if (sibling_replication_info.upstream_node_id != upstream_node_info.node_id)
|
||||
{
|
||||
log_warning(_("assumed sibling node %i monitoring different upstream node %i"),
|
||||
cell->node_info->node_id,
|
||||
sibling_replication_info.upstream_node_id);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
nodes_with_primary_still_visible++;
|
||||
log_notice(_("node %i last saw primary node %i second(s) ago, considering primary still visible"),
|
||||
cell->node_info->node_id,
|
||||
sibling_replication_info.upstream_last_seen);
|
||||
appendPQExpBuffer(&nodes_with_primary_visible,
|
||||
" - node \"%s\" (ID: %i): %i second(s) ago\n",
|
||||
cell->node_info->node_name,
|
||||
cell->node_info->node_id,
|
||||
sibling_replication_info.upstream_last_seen);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user