mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-27 00:46:29 +00:00
Make repmgrd survive to the failover
To do this it needs to reconnect to the new master
This commit is contained in:
239
repmgrd.c
239
repmgrd.c
@@ -81,6 +81,8 @@ bool verbose = false;
|
|||||||
bool monitoring_history = false;
|
bool monitoring_history = false;
|
||||||
char repmgr_schema[MAXLEN];
|
char repmgr_schema[MAXLEN];
|
||||||
|
|
||||||
|
bool failover_done = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* should initialize with {0} to be ANSI complaint ? but this raises
|
* should initialize with {0} to be ANSI complaint ? but this raises
|
||||||
* error with gcc -Wall
|
* error with gcc -Wall
|
||||||
@@ -203,63 +205,37 @@ main(int argc, char **argv)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set my server mode, establish a connection to primary
|
* MAIN LOOP
|
||||||
* and start monitor
|
* This loops cicles once per failover and at startup
|
||||||
*/
|
* Requisites:
|
||||||
if (is_witness(myLocalConn, repmgr_schema, local_options.cluster_name, local_options.node))
|
* - myLocalConn needs to be already setted with an active connection
|
||||||
myLocalMode = WITNESS_MODE;
|
* - no master connection
|
||||||
else if (is_standby(myLocalConn))
|
*/
|
||||||
myLocalMode = STANDBY_MODE;
|
do
|
||||||
else /* is the master */
|
|
||||||
myLocalMode = PRIMARY_MODE;
|
|
||||||
|
|
||||||
switch (myLocalMode)
|
|
||||||
{
|
{
|
||||||
case PRIMARY_MODE:
|
|
||||||
primary_options.node = local_options.node;
|
|
||||||
strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN);
|
|
||||||
primaryConn = myLocalConn;
|
|
||||||
|
|
||||||
checkClusterConfiguration(myLocalConn, primaryConn);
|
|
||||||
checkNodeConfiguration(local_options.conninfo);
|
|
||||||
|
|
||||||
if (reload_configuration(config_file, &local_options))
|
|
||||||
{
|
|
||||||
PQfinish(myLocalConn);
|
|
||||||
myLocalConn = establishDBConnection(local_options.conninfo, true);
|
|
||||||
primaryConn = myLocalConn;
|
|
||||||
update_registration();
|
|
||||||
}
|
|
||||||
|
|
||||||
log_info(_("%s Starting continuous primary connection check\n"), progname);
|
|
||||||
/* Check that primary is still alive, and standbies are sending info */
|
|
||||||
/*
|
/*
|
||||||
* Every SLEEP_MONITOR seconds, do master checks
|
* Set my server mode, establish a connection to primary
|
||||||
* XXX
|
* and start monitor
|
||||||
* Check that standbies are sending info
|
*/
|
||||||
*/
|
if (is_witness(myLocalConn, repmgr_schema, local_options.cluster_name, local_options.node))
|
||||||
for (;;)
|
myLocalMode = WITNESS_MODE;
|
||||||
|
else if (is_standby(myLocalConn))
|
||||||
|
myLocalMode = STANDBY_MODE;
|
||||||
|
else /* is the master */
|
||||||
|
myLocalMode = PRIMARY_MODE;
|
||||||
|
|
||||||
|
switch (myLocalMode)
|
||||||
{
|
{
|
||||||
if (CheckPrimaryConnection())
|
case PRIMARY_MODE:
|
||||||
{
|
primary_options.node = local_options.node;
|
||||||
/*
|
strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN);
|
||||||
CheckActiveStandbiesConnections();
|
primaryConn = myLocalConn;
|
||||||
CheckInactiveStandbies();
|
|
||||||
*/
|
checkClusterConfiguration(myLocalConn, primaryConn);
|
||||||
sleep(SLEEP_MONITOR);
|
checkNodeConfiguration(local_options.conninfo);
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* XXX
|
|
||||||
* May we do something more verbose ?
|
|
||||||
*/
|
|
||||||
exit (1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (got_SIGHUP)
|
|
||||||
{
|
|
||||||
/* if we can reload, then could need to change myLocalConn */
|
|
||||||
if (reload_configuration(config_file, &local_options))
|
if (reload_configuration(config_file, &local_options))
|
||||||
{
|
{
|
||||||
PQfinish(myLocalConn);
|
PQfinish(myLocalConn);
|
||||||
@@ -267,70 +243,112 @@ main(int argc, char **argv)
|
|||||||
primaryConn = myLocalConn;
|
primaryConn = myLocalConn;
|
||||||
update_registration();
|
update_registration();
|
||||||
}
|
}
|
||||||
got_SIGHUP = false;
|
|
||||||
}
|
log_info(_("%s Starting continuous primary connection check\n"), progname);
|
||||||
}
|
|
||||||
break;
|
/* Check that primary is still alive, and standbies are sending info */
|
||||||
case WITNESS_MODE:
|
|
||||||
case STANDBY_MODE:
|
/*
|
||||||
/* I need the id of the primary as well as a connection to it */
|
* Every SLEEP_MONITOR seconds, do master checks
|
||||||
log_info(_("%s Connecting to primary for cluster '%s'\n"),
|
* XXX
|
||||||
progname, local_options.cluster_name);
|
* Check that standbies are sending info
|
||||||
primaryConn = getMasterConnection(myLocalConn, repmgr_schema,
|
*/
|
||||||
local_options.cluster_name,
|
do
|
||||||
&primary_options.node, NULL);
|
{
|
||||||
if (primaryConn == NULL)
|
if (CheckPrimaryConnection())
|
||||||
{
|
{
|
||||||
CloseConnections();
|
/*
|
||||||
exit(ERR_BAD_CONFIG);
|
CheckActiveStandbiesConnections();
|
||||||
}
|
CheckInactiveStandbies();
|
||||||
|
*/
|
||||||
|
sleep(SLEEP_MONITOR);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* XXX
|
||||||
|
* May we do something more verbose ?
|
||||||
|
*/
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (got_SIGHUP)
|
||||||
|
{
|
||||||
|
/* if we can reload, then could need to change myLocalConn */
|
||||||
|
if (reload_configuration(config_file, &local_options))
|
||||||
|
{
|
||||||
|
PQfinish(myLocalConn);
|
||||||
|
myLocalConn = establishDBConnection(local_options.conninfo, true);
|
||||||
|
primaryConn = myLocalConn;
|
||||||
|
update_registration();
|
||||||
|
}
|
||||||
|
got_SIGHUP = false;
|
||||||
|
}
|
||||||
|
} while (!failover_done);
|
||||||
|
break;
|
||||||
|
case WITNESS_MODE:
|
||||||
|
case STANDBY_MODE:
|
||||||
|
/* I need the id of the primary as well as a connection to it */
|
||||||
|
log_info(_("%s Connecting to primary for cluster '%s'\n"),
|
||||||
|
progname, local_options.cluster_name);
|
||||||
|
primaryConn = getMasterConnection(myLocalConn, repmgr_schema,
|
||||||
|
local_options.cluster_name,
|
||||||
|
&primary_options.node, NULL);
|
||||||
|
if (primaryConn == NULL)
|
||||||
|
{
|
||||||
|
CloseConnections();
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
checkClusterConfiguration(myLocalConn, primaryConn);
|
checkClusterConfiguration(myLocalConn, primaryConn);
|
||||||
checkNodeConfiguration(local_options.conninfo);
|
checkNodeConfiguration(local_options.conninfo);
|
||||||
|
|
||||||
if (reload_configuration(config_file, &local_options))
|
|
||||||
{
|
|
||||||
PQfinish(myLocalConn);
|
|
||||||
myLocalConn = establishDBConnection(local_options.conninfo, true);
|
|
||||||
update_registration();
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Every SLEEP_MONITOR seconds, do checks
|
|
||||||
*/
|
|
||||||
if (myLocalMode == WITNESS_MODE)
|
|
||||||
{
|
|
||||||
log_info(_("%s Starting continuous witness node monitoring\n"), progname);
|
|
||||||
}
|
|
||||||
else if (myLocalMode == STANDBY_MODE)
|
|
||||||
{
|
|
||||||
log_info(_("%s Starting continuous standby node monitoring\n"), progname);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (;;)
|
|
||||||
{
|
|
||||||
if (myLocalMode == WITNESS_MODE)
|
|
||||||
WitnessMonitor();
|
|
||||||
else if (myLocalMode == STANDBY_MODE)
|
|
||||||
StandbyMonitor();
|
|
||||||
sleep(SLEEP_MONITOR);
|
|
||||||
|
|
||||||
if (got_SIGHUP)
|
|
||||||
{
|
|
||||||
/* if we can reload, then could need to change myLocalConn */
|
|
||||||
if (reload_configuration(config_file, &local_options))
|
if (reload_configuration(config_file, &local_options))
|
||||||
{
|
{
|
||||||
PQfinish(myLocalConn);
|
PQfinish(myLocalConn);
|
||||||
myLocalConn = establishDBConnection(local_options.conninfo, true);
|
myLocalConn = establishDBConnection(local_options.conninfo, true);
|
||||||
update_registration();
|
update_registration();
|
||||||
}
|
}
|
||||||
got_SIGHUP = false;
|
|
||||||
}
|
/*
|
||||||
|
* Every SLEEP_MONITOR seconds, do checks
|
||||||
|
*/
|
||||||
|
if (myLocalMode == WITNESS_MODE)
|
||||||
|
{
|
||||||
|
log_info(_("%s Starting continuous witness node monitoring\n"), progname);
|
||||||
|
}
|
||||||
|
else if (myLocalMode == STANDBY_MODE)
|
||||||
|
{
|
||||||
|
log_info(_("%s Starting continuous standby node monitoring\n"), progname);
|
||||||
|
}
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if (myLocalMode == WITNESS_MODE)
|
||||||
|
WitnessMonitor();
|
||||||
|
else if (myLocalMode == STANDBY_MODE)
|
||||||
|
StandbyMonitor();
|
||||||
|
sleep(SLEEP_MONITOR);
|
||||||
|
|
||||||
|
if (got_SIGHUP)
|
||||||
|
{
|
||||||
|
/* if we can reload, then could need to change myLocalConn */
|
||||||
|
if (reload_configuration(config_file, &local_options))
|
||||||
|
{
|
||||||
|
PQfinish(myLocalConn);
|
||||||
|
myLocalConn = establishDBConnection(local_options.conninfo, true);
|
||||||
|
update_registration();
|
||||||
|
}
|
||||||
|
got_SIGHUP = false;
|
||||||
|
}
|
||||||
|
} while (!failover_done);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
log_err(_("%s: Unrecognized mode for node %d\n"), progname, local_options.node);
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
default:
|
failover_done = false;
|
||||||
log_err(_("%s: Unrecognized mode for node %d\n"), progname, local_options.node);
|
|
||||||
}
|
} while (true);
|
||||||
|
|
||||||
/* Prevent a double-free */
|
/* Prevent a double-free */
|
||||||
if (primaryConn == myLocalConn)
|
if (primaryConn == myLocalConn)
|
||||||
@@ -481,6 +499,7 @@ StandbyMonitor(void)
|
|||||||
* a new primaryConn
|
* a new primaryConn
|
||||||
*/
|
*/
|
||||||
do_failover();
|
do_failover();
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -901,6 +920,9 @@ do_failover(void)
|
|||||||
exit(ERR_FAILOVER_FAIL);
|
exit(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* to force it to re-calculate mode and master node */
|
||||||
|
failover_done = true;
|
||||||
|
|
||||||
/* and reconnect to the local database */
|
/* and reconnect to the local database */
|
||||||
myLocalConn = establishDBConnection(local_options.conninfo, true);
|
myLocalConn = establishDBConnection(local_options.conninfo, true);
|
||||||
}
|
}
|
||||||
@@ -1089,6 +1111,7 @@ static void
|
|||||||
handle_sigint(SIGNAL_ARGS)
|
handle_sigint(SIGNAL_ARGS)
|
||||||
{
|
{
|
||||||
CloseConnections();
|
CloseConnections();
|
||||||
|
logger_shutdown();
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user