mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-26 08:36:30 +00:00
Minor refactoring of do_master_failover()
- rename some variables for clarity - ensure all structures are initialised correctly - update code comments
This commit is contained in:
69
repmgrd.c
69
repmgrd.c
@@ -315,8 +315,9 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* MAIN LOOP This loops cycles at startup and once per failover and
|
* MAIN LOOP This loops cycles at startup and once per failover and
|
||||||
* Requisites: - my_local_conn needs to be already setted with an active
|
* Requisites:
|
||||||
* connection - no master connection
|
* - my_local_conn must have an active connection to the monitored node
|
||||||
|
* - master_conn must not be open
|
||||||
*/
|
*/
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
@@ -1253,7 +1254,7 @@ do_master_failover(void)
|
|||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
|
||||||
int total_nodes = 0;
|
int total_active_nodes = 0;
|
||||||
int visible_nodes = 0;
|
int visible_nodes = 0;
|
||||||
int ready_nodes = 0;
|
int ready_nodes = 0;
|
||||||
|
|
||||||
@@ -1284,7 +1285,7 @@ do_master_failover(void)
|
|||||||
"SELECT id, conninfo, type, upstream_node_id "
|
"SELECT id, conninfo, type, upstream_node_id "
|
||||||
" FROM %s.repl_nodes "
|
" FROM %s.repl_nodes "
|
||||||
" WHERE cluster = '%s' "
|
" WHERE cluster = '%s' "
|
||||||
" AND active IS TRUE "
|
" AND active IS TRUE "
|
||||||
" AND priority > 0 "
|
" AND priority > 0 "
|
||||||
" ORDER BY priority DESC, id "
|
" ORDER BY priority DESC, id "
|
||||||
" LIMIT %i ",
|
" LIMIT %i ",
|
||||||
@@ -1300,32 +1301,25 @@ do_master_failover(void)
|
|||||||
terminate(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
total_active_nodes = PQntuples(res);
|
||||||
* total nodes that are registered
|
log_debug(_("%d active nodes registered\n"), total_active_nodes);
|
||||||
*/
|
|
||||||
total_nodes = PQntuples(res);
|
|
||||||
log_debug(_("%d active nodes registered\n"), total_nodes);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Build an array with the nodes and indicate which ones are visible and
|
* Build an array with the nodes and indicate which ones are visible and
|
||||||
* ready
|
* ready
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < total_nodes; i++)
|
for (i = 0; i < total_active_nodes; i++)
|
||||||
{
|
{
|
||||||
|
char node_type[MAXLEN];
|
||||||
|
|
||||||
|
nodes[i] = (t_node_info) T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
nodes[i].node_id = atoi(PQgetvalue(res, i, 0));
|
nodes[i].node_id = atoi(PQgetvalue(res, i, 0));
|
||||||
|
|
||||||
strncpy(nodes[i].conninfo_str, PQgetvalue(res, i, 1), MAXCONNINFO);
|
strncpy(nodes[i].conninfo_str, PQgetvalue(res, i, 1), MAXCONNINFO);
|
||||||
|
strncpy(node_type, PQgetvalue(res, i, 2), MAXLEN);
|
||||||
|
|
||||||
nodes[i].type = parse_node_type(PQgetvalue(res, i, 2));
|
nodes[i].type = parse_node_type(node_type);
|
||||||
|
|
||||||
/* Copy details of the failed node */
|
|
||||||
/* XXX only node_id is actually used later */
|
|
||||||
if (nodes[i].type == MASTER)
|
|
||||||
{
|
|
||||||
failed_master.node_id = nodes[i].node_id;
|
|
||||||
failed_master.xlog_location = nodes[i].xlog_location;
|
|
||||||
failed_master.is_ready = nodes[i].is_ready;
|
|
||||||
}
|
|
||||||
|
|
||||||
nodes[i].upstream_node_id = atoi(PQgetvalue(res, i, 3));
|
nodes[i].upstream_node_id = atoi(PQgetvalue(res, i, 3));
|
||||||
|
|
||||||
@@ -1336,12 +1330,21 @@ do_master_failover(void)
|
|||||||
nodes[i].is_visible = false;
|
nodes[i].is_visible = false;
|
||||||
nodes[i].is_ready = false;
|
nodes[i].is_ready = false;
|
||||||
|
|
||||||
nodes[i].xlog_location = InvalidXLogRecPtr;
|
/* Copy details of the failed master node */
|
||||||
|
/* XXX only node_id is actually used later */
|
||||||
|
if (nodes[i].type == MASTER)
|
||||||
|
{
|
||||||
|
failed_master.node_id = nodes[i].node_id;
|
||||||
|
failed_master.xlog_location = nodes[i].xlog_location;
|
||||||
|
failed_master.is_ready = nodes[i].is_ready;
|
||||||
|
}
|
||||||
|
|
||||||
log_debug(_("node=%d conninfo=\"%s\" type=%s\n"),
|
log_debug(_("node=%i conninfo=\"%s\" type=%s\n"),
|
||||||
nodes[i].node_id, nodes[i].conninfo_str,
|
nodes[i].node_id,
|
||||||
PQgetvalue(res, i, 2));
|
nodes[i].conninfo_str,
|
||||||
|
node_type);
|
||||||
|
|
||||||
|
/* XXX do we need to try and connect to the master here? */
|
||||||
node_conn = establish_db_connection(nodes[i].conninfo_str, false);
|
node_conn = establish_db_connection(nodes[i].conninfo_str, false);
|
||||||
|
|
||||||
/* if we can't see the node just skip it */
|
/* if we can't see the node just skip it */
|
||||||
@@ -1361,13 +1364,13 @@ do_master_failover(void)
|
|||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
log_debug(_("total nodes counted: registered=%d, visible=%d\n"),
|
log_debug(_("total nodes counted: registered=%d, visible=%d\n"),
|
||||||
total_nodes, visible_nodes);
|
total_active_nodes, visible_nodes);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Am I on the group that should keep alive? If I see less than half of
|
* Am I on the group that should keep alive? If I see less than half of
|
||||||
* total_nodes then I should do nothing
|
* total_active_nodes then I should do nothing
|
||||||
*/
|
*/
|
||||||
if (visible_nodes < (total_nodes / 2.0))
|
if (visible_nodes < (total_active_nodes / 2.0))
|
||||||
{
|
{
|
||||||
log_err(_("Unable to reach most of the nodes.\n"
|
log_err(_("Unable to reach most of the nodes.\n"
|
||||||
"Let the other standby servers decide which one will be the master.\n"
|
"Let the other standby servers decide which one will be the master.\n"
|
||||||
@@ -1376,7 +1379,7 @@ do_master_failover(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Query all available nodes to determine readiness and LSN */
|
/* Query all available nodes to determine readiness and LSN */
|
||||||
for (i = 0; i < total_nodes; i++)
|
for (i = 0; i < total_active_nodes; i++)
|
||||||
{
|
{
|
||||||
log_debug("checking node %i...\n", nodes[i].node_id);
|
log_debug("checking node %i...\n", nodes[i].node_id);
|
||||||
|
|
||||||
@@ -1454,7 +1457,7 @@ do_master_failover(void)
|
|||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
/* Wait for each node to come up and report a valid LSN */
|
/* Wait for each node to come up and report a valid LSN */
|
||||||
for (i = 0; i < total_nodes; i++)
|
for (i = 0; i < total_active_nodes; i++)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* ensure witness server is marked as ready, and skip
|
* ensure witness server is marked as ready, and skip
|
||||||
@@ -1614,7 +1617,7 @@ do_master_failover(void)
|
|||||||
/*
|
/*
|
||||||
* determine which one is the best candidate to promote to master
|
* determine which one is the best candidate to promote to master
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < total_nodes; i++)
|
for (i = 0; i < total_active_nodes; i++)
|
||||||
{
|
{
|
||||||
/* witness server can never be a candidate */
|
/* witness server can never be a candidate */
|
||||||
if (nodes[i].type == WITNESS)
|
if (nodes[i].type == WITNESS)
|
||||||
@@ -1839,8 +1842,10 @@ do_master_failover(void)
|
|||||||
termPQExpBuffer(&event_details);
|
termPQExpBuffer(&event_details);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* to force it to re-calculate mode and master node */
|
/*
|
||||||
// ^ ZZZ check that behaviour ^
|
* setting "failover_done" to true will cause the node's monitoring loop
|
||||||
|
* to restart in the appropriate mode for the node's (possibly new) role
|
||||||
|
*/
|
||||||
failover_done = true;
|
failover_done = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user