mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-27 00:46:29 +00:00
Add some hopefully correct comments for future reference
Also some debugging output which will need to be removed later.
This commit is contained in:
45
repmgrd.c
45
repmgrd.c
@@ -20,6 +20,9 @@
|
|||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* ZZZ - remove superfluous debugging output */
|
||||||
|
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
@@ -424,7 +427,10 @@ main(int argc, char **argv)
|
|||||||
if (my_local_mode == WITNESS_MODE)
|
if (my_local_mode == WITNESS_MODE)
|
||||||
witness_monitor();
|
witness_monitor();
|
||||||
else if (my_local_mode == STANDBY_MODE)
|
else if (my_local_mode == STANDBY_MODE)
|
||||||
|
{
|
||||||
standby_monitor();
|
standby_monitor();
|
||||||
|
log_debug(_("returned from standby_monitor()\n"));
|
||||||
|
}
|
||||||
sleep(local_options.monitor_interval_secs);
|
sleep(local_options.monitor_interval_secs);
|
||||||
|
|
||||||
if (got_SIGHUP)
|
if (got_SIGHUP)
|
||||||
@@ -441,6 +447,10 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
got_SIGHUP = false;
|
got_SIGHUP = false;
|
||||||
}
|
}
|
||||||
|
if(failover_done)
|
||||||
|
{
|
||||||
|
log_debug(_("standby check loop will terminate\n"));
|
||||||
|
}
|
||||||
} while (!failover_done);
|
} while (!failover_done);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@@ -448,6 +458,8 @@ main(int argc, char **argv)
|
|||||||
local_options.node);
|
local_options.node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log_debug(_("end of main loop\n"));
|
||||||
|
|
||||||
failover_done = false;
|
failover_done = false;
|
||||||
|
|
||||||
} while (true);
|
} while (true);
|
||||||
@@ -678,6 +690,7 @@ standby_monitor(void)
|
|||||||
* and a new primary_conn
|
* and a new primary_conn
|
||||||
*/
|
*/
|
||||||
do_failover();
|
do_failover();
|
||||||
|
log_debug("standby_monitor() - returning from do_failover()\n");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -685,11 +698,22 @@ standby_monitor(void)
|
|||||||
/* Check if we still are a standby, we could have been promoted */
|
/* Check if we still are a standby, we could have been promoted */
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
log_debug("standby_monitor() - checking if still standby\n");
|
||||||
ret = is_standby(my_local_conn);
|
ret = is_standby(my_local_conn);
|
||||||
|
|
||||||
switch (ret)
|
switch (ret)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
|
/*
|
||||||
|
* This situation can occur if `pg_ctl promote` was manually executed
|
||||||
|
* on the node. If the original master is still running after this
|
||||||
|
* node has been promoted, we're in a "two brain" situation which
|
||||||
|
* will require manual resolution as there's no way of determing
|
||||||
|
* which master is the correct one.
|
||||||
|
*
|
||||||
|
* XXX check if the original master is still active and display a
|
||||||
|
* warning
|
||||||
|
*/
|
||||||
log_err(_("It seems like we have been promoted, so exit from monitoring...\n"));
|
log_err(_("It seems like we have been promoted, so exit from monitoring...\n"));
|
||||||
terminate(1);
|
terminate(1);
|
||||||
break;
|
break;
|
||||||
@@ -910,6 +934,7 @@ do_failover(void)
|
|||||||
/* Query all the nodes to determine which ones are ready */
|
/* Query all the nodes to determine which ones are ready */
|
||||||
for (i = 0; i < total_nodes; i++)
|
for (i = 0; i < total_nodes; i++)
|
||||||
{
|
{
|
||||||
|
log_debug("checking node %i...\n", nodes[i].node_id);
|
||||||
/* if the node is not visible, skip it */
|
/* if the node is not visible, skip it */
|
||||||
if (!nodes[i].is_visible)
|
if (!nodes[i].is_visible)
|
||||||
continue;
|
continue;
|
||||||
@@ -975,13 +1000,14 @@ do_failover(void)
|
|||||||
update_shared_memory(last_wal_standby_applied);
|
update_shared_memory(last_wal_standby_applied);
|
||||||
terminate(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* write last location in shared memory */
|
/* write last location in shared memory */
|
||||||
update_shared_memory(PQgetvalue(res, 0, 0));
|
update_shared_memory(PQgetvalue(res, 0, 0));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
|
/* Wait for each node to come up and report a valid LSN */
|
||||||
for (i = 0; i < total_nodes; i++)
|
for (i = 0; i < total_nodes; i++)
|
||||||
{
|
{
|
||||||
|
log_debug(_("is_ready check for node %i\n"), nodes[i].node_id);
|
||||||
while (!nodes[i].is_ready)
|
while (!nodes[i].is_ready)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@@ -1003,6 +1029,7 @@ do_failover(void)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
/* if the node is ready there is nothing to check, skip it too */
|
/* if the node is ready there is nothing to check, skip it too */
|
||||||
|
/* ZZZ is this check pointless? */
|
||||||
if (nodes[i].is_ready)
|
if (nodes[i].is_ready)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -1038,17 +1065,27 @@ do_failover(void)
|
|||||||
|
|
||||||
xlog_recptr = lsn_to_xlogrecptr(PQgetvalue(res, 0, 0), &lsn_format_ok);
|
xlog_recptr = lsn_to_xlogrecptr(PQgetvalue(res, 0, 0), &lsn_format_ok);
|
||||||
|
|
||||||
/* If position is 0/0, check for format error, otherwise continue loop */
|
/* If position reported as "invalid", check for format error or
|
||||||
|
* empty string; oherwise position is 0/0 and we need to continue
|
||||||
|
* looping until a valid LSN is reported
|
||||||
|
*/
|
||||||
if(xlog_recptr == InvalidXLogRecPtr)
|
if(xlog_recptr == InvalidXLogRecPtr)
|
||||||
{
|
{
|
||||||
/* Unable to parse value returned by `repmgr_get_last_standby_location()` */
|
log_debug("Invalid LSN returned - '%s'", PQgetvalue(res, 0, 0));
|
||||||
|
|
||||||
if(lsn_format_ok == false)
|
if(lsn_format_ok == false)
|
||||||
{
|
{
|
||||||
|
/* Unable to parse value returned by `repmgr_get_last_standby_location()` */
|
||||||
if(*PQgetvalue(res, 0, 0) == '\0')
|
if(*PQgetvalue(res, 0, 0) == '\0')
|
||||||
{
|
{
|
||||||
log_crit("Whoops, seems as if shared_preload_libraries=repmgr_funcs is not set!\n");
|
log_crit("Whoops, seems as if shared_preload_libraries=repmgr_funcs is not set!\n");
|
||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Very unlikely to happen; in the absence of any better
|
||||||
|
* strategy keep checking
|
||||||
|
*/
|
||||||
log_warning(_("Unable to parse LSN \"%s\"\n"),
|
log_warning(_("Unable to parse LSN \"%s\"\n"),
|
||||||
PQgetvalue(res, 0, 0));
|
PQgetvalue(res, 0, 0));
|
||||||
}
|
}
|
||||||
@@ -1056,6 +1093,7 @@ do_failover(void)
|
|||||||
PQclear(res);
|
PQclear(res);
|
||||||
PQfinish(node_conn);
|
PQfinish(node_conn);
|
||||||
|
|
||||||
|
/* If position is 0/0, keep checking */
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1186,6 +1224,7 @@ do_failover(void)
|
|||||||
terminate(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log_debug("failover done\n");
|
||||||
/* to force it to re-calculate mode and master node */
|
/* to force it to re-calculate mode and master node */
|
||||||
failover_done = true;
|
failover_done = true;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user