pg_indent'ing all files…

Conflicts:
	version.h
This commit is contained in:
Christian Kruse
2014-03-06 16:07:01 +01:00
committed by Jaime Casanova
parent 069f9ff2ed
commit 1c67e105ff
16 changed files with 1494 additions and 1279 deletions

View File

@@ -71,6 +71,7 @@ check_dir(char *dir)
}
#ifdef WIN32
/*
* This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
* released version

View File

@@ -25,7 +25,8 @@
void
parse_config(const char *config_file, t_configuration_options * options)
{
char *s, buff[MAXLINELENGTH];
char *s,
buff[MAXLINELENGTH];
char name[MAXLEN];
char value[MAXLEN];
@@ -56,12 +57,13 @@ parse_config(const char *config_file, t_configuration_options *options)
options->retry_promote_interval_secs = 300;
/*
* Since some commands don't require a config file at all, not
* having one isn't necessarily a problem.
* Since some commands don't require a config file at all, not having one
* isn't necessarily a problem.
*/
if (fp == NULL)
{
log_err(_("Did not find the configuration file '%s', continuing\n"), config_file);
log_err(_("Did not find the configuration file '%s', continuing\n"),
config_file);
return;
}
@@ -93,6 +95,7 @@ parse_config(const char *config_file, t_configuration_options *options)
else if (strcmp(name, "failover") == 0)
{
char failoverstr[MAXLEN];
strncpy(failoverstr, value, MAXLEN);
if (strcmp(failoverstr, "manual") == 0)
@@ -179,7 +182,8 @@ char *
trim(char *s)
{
/* Initialize start, end pointers */
char *s1 = s, *s2 = &s[strlen (s) - 1];
char *s1 = s,
*s2 = &s[strlen(s) - 1];
/* Trim and delimit right side */
while ((isspace(*s2)) && (s2 >= s1))
@@ -313,15 +317,15 @@ reload_configuration(char *config_file, t_configuration_options *orig_options)
orig_options->master_response_timeout = new_options.master_response_timeout;
orig_options->reconnect_attempts = new_options.reconnect_attempts;
orig_options->reconnect_intvl = new_options.reconnect_intvl;
/*
* XXX These ones can change with a simple SIGHUP?
strcpy (orig_options->loglevel, new_options.loglevel);
strcpy (orig_options->logfacility, new_options.logfacility);
logger_shutdown();
XXX do we have progname here ?
logger_init(progname, orig_options.loglevel, orig_options.logfacility);
*
* strcpy (orig_options->loglevel, new_options.loglevel); strcpy
* (orig_options->logfacility, new_options.logfacility);
*
* logger_shutdown(); XXX do we have progname here ? logger_init(progname,
* orig_options.loglevel, orig_options.logfacility);
*/
return true;

View File

@@ -53,7 +53,8 @@ establishDBConnection(const char *conninfo, const bool exit_on_error)
}
PGconn *
establishDBConnectionByParams(const char *keywords[], const char *values[],const bool exit_on_error)
establishDBConnectionByParams(const char *keywords[], const char *values[],
const bool exit_on_error)
{
/* Make a connection to the database */
PGconn *conn = PQconnectdbParams(keywords, values, true);
@@ -124,6 +125,7 @@ bool
is_pgup(PGconn *conn, int timeout)
{
char sqlquery[QUERY_STR_LEN];
/* Check the connection status twice in case it changes after reset */
bool twice = false;
@@ -160,7 +162,11 @@ is_pgup(PGconn *conn, int timeout)
break;
failed:
/* we need to retry, because we might just have loose the connection once */
/*
* we need to retry, because we might just have loose the
* connection once
*/
if (twice)
return false;
PQreset(conn); /* reconnect */
@@ -378,8 +384,8 @@ getMasterConnection(PGconn *standby_conn, char *schema, char *cluster,
/*
* Can't use the is_standby() function here because on error that
* function closes the connection passed and exits. This still
* needs to close master_conn first.
* function closes the connection passed and exits. This still needs
* to close master_conn first.
*/
res2 = PQexec(master_conn, "SELECT pg_is_in_recovery()");
@@ -408,14 +414,13 @@ getMasterConnection(PGconn *standby_conn, char *schema, char *cluster,
}
}
/* If we finish this loop without finding a master then
* we doesn't have the info or the master has failed (or we
* reached max_connections or superuser_reserved_connections,
* anything else I'm missing?).
/*
* If we finish this loop without finding a master then we doesn't have
* the info or the master has failed (or we reached max_connections or
* superuser_reserved_connections, anything else I'm missing?).
*
* Probably we will need to check the error to know if we need
* to start failover procedure or just fix some situation on the
* standby.
* Probably we will need to check the error to know if we need to start
* failover procedure or just fix some situation on the standby.
*/
PQclear(res1);
return NULL;
@@ -423,8 +428,8 @@ getMasterConnection(PGconn *standby_conn, char *schema, char *cluster,
/*
* wait until current query finishes ignoring any results, this could be an async command
* or a cancelation of a query
* wait until current query finishes ignoring any results, this could be an
* async command or a cancelation of a query
* return 1 if Ok; 0 if any error ocurred; -1 if timeout reached
*/
int
@@ -433,7 +438,9 @@ wait_connection_availability(PGconn *conn, long long timeout)
PGresult *res;
fd_set read_set;
int sock = PQsocket(conn);
struct timeval tmout, before, after;
struct timeval tmout,
before,
after;
struct timezone tz;
/* recalc to microseconds */
@@ -450,7 +457,8 @@ wait_connection_availability(PGconn *conn, long long timeout)
if (PQisBusy(conn) == 0)
{
do {
do
{
res = PQgetResult(conn);
PQclear(res);
} while (res != NULL);
@@ -505,9 +513,8 @@ CancelQuery(PGconn *conn, int timeout)
return false;
/*
* PQcancel can only return 0 if socket()/connect()/send()
* fails, in any of those cases we can assume something
* bad happened to the connection
* PQcancel can only return 0 if socket()/connect()/send() fails, in any
* of those cases we can assume something bad happened to the connection
*/
if (PQcancel(pgcancel, errbuf, ERRBUFF_SIZE) == 0)
{

View File

@@ -41,4 +41,5 @@ PGconn *getMasterConnection(PGconn *standby_conn, char *schema, char *cluster,
int wait_connection_availability(PGconn *conn, long long timeout);
bool CancelQuery(PGconn *conn, int timeout);
#endif

27
log.c
View File

@@ -39,7 +39,9 @@
/* #define REPMGR_DEBUG */
void stderr_log_with_level(const char *level_name, int level, const char *fmt, ...) {
void
stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
{
size_t len = strlen(fmt);
char fmt1[len + 150];
time_t t;
@@ -47,7 +49,8 @@ void stderr_log_with_level(const char *level_name, int level, const char *fmt, .
char buff[100];
va_list ap;
if(log_level >= level) {
if (log_level >= level)
{
time(&t);
tm = localtime(&t);
@@ -70,9 +73,9 @@ static int detect_log_facility(const char* facility);
int log_type = REPMGR_STDERR;
int log_level = LOG_NOTICE;
bool logger_init(t_configuration_options *opts, const char* ident, const char* level, const char* facility)
bool
logger_init(t_configuration_options * opts, const char *ident, const char *level, const char *facility)
{
int l;
int f;
@@ -139,12 +142,12 @@ bool logger_init(t_configuration_options *opts, const char* ident, const char* l
stderr_log_notice(_("Setup syslog (level: %s, facility: %s)\n"), level, facility);
}
#endif
if (*opts->logfile)
{
FILE *fd;
fd = freopen(opts->logfile, "a", stderr);
if (fd == NULL)
@@ -158,9 +161,9 @@ bool logger_init(t_configuration_options *opts, const char* ident, const char* l
}
bool logger_shutdown(void)
bool
logger_shutdown(void)
{
#ifdef HAVE_SYSLOG
if (log_type == REPMGR_SYSLOG)
closelog();
@@ -174,13 +177,15 @@ bool logger_shutdown(void)
* options, which might increase requested logging over what's specified
* in the regular configuration file.
*/
void logger_min_verbose(int minimum)
void
logger_min_verbose(int minimum)
{
if (log_level < minimum)
log_level = minimum;
}
int detect_log_level(const char* level)
int
detect_log_level(const char *level)
{
if (!strcmp(level, "DEBUG"))
return LOG_DEBUG;
@@ -202,9 +207,11 @@ int detect_log_level(const char* level)
return 0;
}
int detect_log_facility(const char* facility)
int
detect_log_facility(const char *facility)
{
int local = 0;
if (!strncmp(facility, "LOCAL", 5) && strlen(facility) == 6)
{

11
log.h
View File

@@ -25,7 +25,9 @@
#define REPMGR_SYSLOG 1
#define REPMGR_STDERR 2
void stderr_log_with_level(const char *level_name, int level, const char *fmt, ...) __attribute__ ((format (PG_PRINTF_ATTRIBUTE, 3, 4)));
void
stderr_log_with_level(const char *level_name, int level, const char *fmt,...)
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
/* Standard error logging */
#define stderr_log_debug(...) stderr_log_with_level("DEBUG", LOG_DEBUG, __VA_ARGS__)
@@ -88,7 +90,6 @@ void stderr_log_with_level(const char *level_name, int level, const char *fmt, .
if (log_type == REPMGR_SYSLOG) syslog(LOG_ALERT, __VA_ARGS__); \
else stderr_log_alert(__VA_ARGS__); \
}
#else
#define LOG_EMERG 0 /* system is unusable */
@@ -108,13 +109,15 @@ void stderr_log_with_level(const char *level_name, int level, const char *fmt, .
#define log_crit(...) stderr_log_crit(__VA_ARGS__)
#define log_alert(...) stderr_log_alert(__VA_ARGS__)
#define log_emerg(...) stderr_log_emerg(__VA_ARGS__)
#endif
/* Logger initialisation and shutdown */
bool logger_shutdown(void);
bool logger_init(t_configuration_options *opts, const char* ident, const char* level, const char* facility);
bool logger_init(t_configuration_options * opts, const char *ident,
const char *level, const char *facility);
void logger_min_verbose(int minimum);
extern int log_type;

432
repmgr.c

File diff suppressed because it is too large Load Diff

301
repmgrd.c
View File

@@ -57,7 +57,6 @@ const XLogRecPtr InvalidXLogRecPtr = {0, 0};
#define XLByteLT(a, b) \
(a < b)
#else
#define XLAssign(a, b) \
a.xlogid = b.xlogid; \
@@ -148,7 +147,8 @@ static void do_daemonize(void);
static void check_and_create_pid_file(const char *pid_file);
static void
CloseConnections() {
CloseConnections()
{
if (primaryConn != NULL && PQisBusy(primaryConn) == 1)
CancelQuery(primaryConn, local_options.master_response_timeout);
@@ -177,11 +177,13 @@ main(int argc, char **argv)
};
int optindex;
int c, ret;
int c,
ret;
bool daemonize = false;
FILE *fd;
char standby_version[MAXVERSIONSTR], *ret_ver;
char standby_version[MAXVERSIONSTR],
*ret_ver;
progname = get_progname(argv[0]);
@@ -263,7 +265,8 @@ main(int argc, char **argv)
strerror(errno));
}
logger_init(&local_options, progname, local_options.loglevel, local_options.logfacility);
logger_init(&local_options, progname, local_options.loglevel,
local_options.logfacility);
if (verbose)
logger_min_verbose(LOG_INFO);
@@ -278,9 +281,11 @@ main(int argc, char **argv)
}
}
snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name);
snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX,
local_options.cluster_name);
log_info(_("%s Connecting to database '%s'\n"), progname, local_options.conninfo);
log_info(_("%s Connecting to database '%s'\n"), progname,
local_options.conninfo);
myLocalConn = establishDBConnection(local_options.conninfo, true);
/* should be v9 or better */
@@ -289,25 +294,25 @@ main(int argc, char **argv)
if (ret_ver == NULL || strcmp(standby_version, "") == 0)
{
if (ret_ver != NULL)
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
log_err(_("%s needs standby to be PostgreSQL 9.0 or better\n"),
progname);
terminate(ERR_BAD_CONFIG);
}
/*
* MAIN LOOP
* This loops cicles once per failover and at startup
* Requisites:
* - myLocalConn needs to be already setted with an active connection
* - no master connection
* MAIN LOOP This loops cicles once per failover and at startup
* Requisites: - myLocalConn needs to be already setted with an active
* connection - no master connection
*/
do
{
/*
* Set my server mode, establish a connection to primary
* and start monitor
* Set my server mode, establish a connection to primary and start
* monitor
*/
ret = is_witness(myLocalConn, repmgr_schema, local_options.cluster_name, local_options.node);
ret = is_witness(myLocalConn, repmgr_schema,
local_options.cluster_name, local_options.node);
if (ret == 1)
myLocalMode = WITNESS_MODE;
@@ -321,9 +326,11 @@ main(int argc, char **argv)
myLocalMode = PRIMARY_MODE;
}
/* XXX we did this before changing is_standby() to return int; we
/*
* XXX we did this before changing is_standby() to return int; we
* should not exit at this point, but for now we do until we have a
* better strategy */
* better strategy
*/
if (ret == -1)
terminate(1);
@@ -331,7 +338,8 @@ main(int argc, char **argv)
{
case PRIMARY_MODE:
primary_options.node = local_options.node;
strncpy(primary_options.conninfo, local_options.conninfo, MAXLEN);
strncpy(primary_options.conninfo, local_options.conninfo,
MAXLEN);
primaryConn = myLocalConn;
checkClusterConfiguration(myLocalConn);
@@ -345,36 +353,42 @@ main(int argc, char **argv)
update_registration();
}
log_info(_("%s Starting continuous primary connection check\n"), progname);
/* Check that primary is still alive, and standbies are sending info */
log_info(_("%s Starting continuous primary connection check\n"),
progname);
/*
* Every local_options.monitor_interval_secs seconds, do master checks
* XXX
* Check that standbies are sending info
* Check that primary is still alive, and standbies are
* sending info
*/
/*
* Every local_options.monitor_interval_secs seconds, do
* master checks XXX Check that standbies are sending info
*/
do
{
if (CheckConnection(primaryConn, "master"))
{
/*
CheckActiveStandbiesConnections();
CheckInactiveStandbies();
* CheckActiveStandbiesConnections();
* CheckInactiveStandbies();
*/
sleep(local_options.monitor_interval_secs);
}
else
{
/* XXX
* May we do something more verbose ?
/*
* XXX May we do something more verbose ?
*/
terminate(1);
}
if (got_SIGHUP)
{
/* if we can reload, then could need to change myLocalConn */
/*
* if we can reload, then could need to change
* myLocalConn
*/
if (reload_configuration(config_file, &local_options))
{
PQfinish(myLocalConn);
@@ -384,6 +398,7 @@ main(int argc, char **argv)
if (*local_options.logfile)
{
FILE *fd;
fd = freopen(local_options.logfile, "a", stderr);
if (fd == NULL)
{
@@ -423,15 +438,18 @@ main(int argc, char **argv)
}
/*
* Every local_options.monitor_interval_secs seconds, do checks
* Every local_options.monitor_interval_secs seconds, do
* checks
*/
if (myLocalMode == WITNESS_MODE)
{
log_info(_("%s Starting continuous witness node monitoring\n"), progname);
log_info(_("%s Starting continuous witness node monitoring\n"),
progname);
}
else if (myLocalMode == STANDBY_MODE)
{
log_info(_("%s Starting continuous standby node monitoring\n"), progname);
log_info(_("%s Starting continuous standby node monitoring\n"),
progname);
}
do
@@ -444,7 +462,10 @@ main(int argc, char **argv)
if (got_SIGHUP)
{
/* if we can reload, then could need to change myLocalConn */
/*
* if we can reload, then could need to change
* myLocalConn
*/
if (reload_configuration(config_file, &local_options))
{
PQfinish(myLocalConn);
@@ -456,7 +477,8 @@ main(int argc, char **argv)
} while (!failover_done);
break;
default:
log_err(_("%s: Unrecognized mode for node %d\n"), progname, local_options.node);
log_err(_("%s: Unrecognized mode for node %d\n"), progname,
local_options.node);
}
failover_done = false;
@@ -485,13 +507,17 @@ WitnessMonitor(void)
* Check if the master is still available, if after 5 minutes of retries
* we cannot reconnect, return false.
*/
CheckConnection(primaryConn, "master"); /* this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds */
CheckConnection(primaryConn, "master"); /* this take up to
* local_options.reconnect_atte
* mpts *
* local_options.reconnect_intv
* l seconds */
if (PQstatus(primaryConn) != CONNECTION_OK)
{
/*
* If we can't reconnect, just exit...
* XXX we need to make witness connect to the new master
* If we can't reconnect, just exit... XXX we need to make witness
* connect to the new master
*/
terminate(0);
}
@@ -501,12 +527,13 @@ WitnessMonitor(void)
return;
/*
* Cancel any query that is still being executed,
* so i can insert the current record
* Cancel any query that is still being executed, so i can insert the
* current record
*/
if (!CancelQuery(primaryConn, local_options.master_response_timeout))
return;
if (wait_connection_availability(primaryConn, local_options.master_response_timeout) != 1)
if (wait_connection_availability(primaryConn,
local_options.master_response_timeout) != 1)
return;
/* Get local xlog info */
@@ -532,11 +559,12 @@ WitnessMonitor(void)
"VALUES(%d, %d, '%s'::timestamp with time zone, "
" pg_current_xlog_location(), null, "
" 0, 0)",
repmgr_schema, primary_options.node, local_options.node, monitor_witness_timestamp);
repmgr_schema, primary_options.node, local_options.node,
monitor_witness_timestamp);
/*
* Execute the query asynchronously, but don't check for a result. We
* will check the result next time we pause for a monitor step.
* Execute the query asynchronously, but don't check for a result. We will
* check the result next time we pause for a monitor step.
*/
log_debug("WitnessMonitor: %s\n", sqlquery);
if (PQsendQuery(primaryConn, sqlquery) == 0)
@@ -564,14 +592,19 @@ StandbyMonitor(void)
unsigned long long int lsn_standby_received;
unsigned long long int lsn_standby_applied;
int connection_retries, ret;
int connection_retries,
ret;
bool did_retry = false;
/*
* Check if the master is still available, if after 5 minutes of retries
* we cannot reconnect, try to get a new master.
*/
CheckConnection(primaryConn, "master"); /* this take up to local_options.reconnect_attempts * local_options.reconnect_intvl seconds */
CheckConnection(primaryConn, "master"); /* this take up to
* local_options.reconnect_atte
* mpts *
* local_options.reconnect_intv
* l seconds */
if (!CheckConnection(myLocalConn, "standby"))
{
@@ -593,16 +626,24 @@ StandbyMonitor(void)
local_options.cluster_name, &primary_options.node, NULL);
if (PQstatus(primaryConn) == CONNECTION_OK)
{
/* Connected, we can continue the process so break the loop */
log_err(_("Connected to node %d, continue monitoring.\n"), primary_options.node);
/*
* Connected, we can continue the process so break the
* loop
*/
log_err(_("Connected to node %d, continue monitoring.\n"),
primary_options.node);
break;
}
else
{
log_err(_("We haven't found a new master, waiting before retry...\n"));
/* wait local_options.retry_promote_interval_secs minutes before retries,
* after 6 failures (6 * local_options.monitor_interval_secs
* seconds) we stop trying */
/*
* wait local_options.retry_promote_interval_secs minutes
* before retries, after 6 failures (6 *
* local_options.monitor_interval_secs seconds) we stop
* trying
*/
sleep(local_options.retry_promote_interval_secs);
}
}
@@ -616,8 +657,8 @@ StandbyMonitor(void)
else if (local_options.failover == AUTOMATIC_FAILOVER)
{
/*
* When we returns from this function we will have a new primary and
* a new primaryConn
* When we returns from this function we will have a new primary
* and a new primaryConn
*/
do_failover();
return;
@@ -625,7 +666,8 @@ StandbyMonitor(void)
}
/* Check if we still are a standby, we could have been promoted */
do {
do
{
ret = is_standby(myLocalConn);
switch (ret)
@@ -658,8 +700,8 @@ StandbyMonitor(void)
return;
/*
* Cancel any query that is still being executed,
* so i can insert the current record
* Cancel any query that is still being executed, so i can insert the
* current record
*/
if (!CancelQuery(primaryConn, local_options.master_response_timeout))
return;
@@ -722,8 +764,8 @@ StandbyMonitor(void)
(lsn_standby_received - lsn_standby_applied));
/*
* Execute the query asynchronously, but don't check for a result. We
* will check the result next time we pause for a monitor step.
* Execute the query asynchronously, but don't check for a result. We will
* check the result next time we pause for a monitor step.
*/
log_debug("StandbyMonitor: %s\n", sqlquery);
if (PQsendQuery(primaryConn, sqlquery) == 0)
@@ -756,8 +798,8 @@ do_failover(void)
PGconn *nodeConn = NULL;
/*
* will get info about until 50 nodes,
* which seems to be large enough for most scenarios
* will get info about until 50 nodes, which seems to be large enough for
* most scenarios
*/
nodeInfo nodes[50];
@@ -785,21 +827,28 @@ do_failover(void)
total_nodes = PQntuples(res);
log_debug(_("%s: there are %d nodes registered\n"), progname, total_nodes);
/* Build an array with the nodes and indicate which ones are visible and ready */
/*
* Build an array with the nodes and indicate which ones are visible and
* ready
*/
for (i = 0; i < total_nodes; i++)
{
nodes[i].nodeId = atoi(PQgetvalue(res, i, 0));
strncpy(nodes[i].conninfostr, PQgetvalue(res, i, 1), MAXLEN);
nodes[i].is_witness = (strcmp(PQgetvalue(res, i, 2), "t") == 0) ? true : false;
/* Initialize on false so if we can't reach this node we know that later */
/*
* Initialize on false so if we can't reach this node we know that
* later
*/
nodes[i].is_visible = false;
nodes[i].is_ready = false;
XLAssignValue(nodes[i].xlog_location, 0, 0);
log_debug(_("%s: node=%d conninfo=\"%s\" witness=%s\n"),
progname, nodes[i].nodeId, nodes[i].conninfostr, (nodes[i].is_witness) ? "true" : "false");
progname, nodes[i].nodeId, nodes[i].conninfostr,
(nodes[i].is_witness) ? "true" : "false");
nodeConn = establishDBConnection(nodes[i].conninfostr, false);
@@ -819,11 +868,12 @@ do_failover(void)
}
PQclear(res);
log_debug(_("Total nodes counted: registered=%d, visible=%d\n"), total_nodes, visible_nodes);
log_debug(_("Total nodes counted: registered=%d, visible=%d\n"),
total_nodes, visible_nodes);
/*
* am i on the group that should keep alive?
* if i see less than half of total_nodes then i should do nothing
* am i on the group that should keep alive? if i see less than half of
* total_nodes then i should do nothing
*/
if (visible_nodes < (total_nodes / 2.0))
{
@@ -844,9 +894,11 @@ do_failover(void)
continue;
nodeConn = establishDBConnection(nodes[i].conninfostr, false);
/* XXX
* This shouldn't happen, if this happens it means this is a major problem
* maybe network outages? anyway, is better for a human to react
/*
* XXX This shouldn't happen, if this happens it means this is a major
* problem maybe network outages? anyway, is better for a human to
* react
*/
if (PQstatus(nodeConn) != CONNECTION_OK)
{
@@ -861,7 +913,8 @@ do_failover(void)
res = PQexec(nodeConn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_info(_("Can't get node's last standby location: %s\n"), PQerrorMessage(nodeConn));
log_info(_("Can't get node's last standby location: %s\n"),
PQerrorMessage(nodeConn));
log_info(_("Connection details: %s\n"), nodes[i].conninfostr);
PQclear(res);
PQfinish(nodeConn);
@@ -869,7 +922,8 @@ do_failover(void)
}
if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2)
log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res, 0, 0));
log_info(_("could not parse transaction log location \"%s\"\n"),
PQgetvalue(res, 0, 0));
log_debug("XLog position of node %d: log id=%u (%X), offset=%u (%X)\n",
nodes[i].nodeId, uxlogid, uxlogid, uxrecoff, uxrecoff);
@@ -894,7 +948,9 @@ do_failover(void)
res = PQexec(myLocalConn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(myLocalConn));
log_err(_("PQexec failed: %s.\nReport an invalid value to not be "
" considered as new primary and exit.\n"),
PQerrorMessage(myLocalConn));
PQclear(res);
sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0);
update_shared_memory(last_wal_standby_applied);
@@ -910,8 +966,8 @@ do_failover(void)
while (!nodes[i].is_ready)
{
/*
* the witness will always be masked as ready if it's still
* not marked that way and avoid a useless query
* the witness will always be masked as ready if it's still not
* marked that way and avoid a useless query
*/
if (nodes[i].is_witness)
{
@@ -932,25 +988,32 @@ do_failover(void)
break;
nodeConn = establishDBConnection(nodes[i].conninfostr, false);
/* XXX
* This shouldn't happen, if this happens it means this is a major problem
* maybe network outages? anyway, is better for a human to react
/*
* XXX This shouldn't happen, if this happens it means this is a
* major problem maybe network outages? anyway, is better for a
* human to react
*/
if (PQstatus(nodeConn) != CONNECTION_OK)
{
/* XXX */
log_info(_("At this point, it could be some race conditions that are acceptable, assume the node is restarting and starting failover procedure\n"));
log_info(_("At this point, it could be some race conditions "
"that are acceptable, assume the node is restarting "
"and starting failover procedure\n"));
break;
}
uxlogid = 0;
uxrecoff = 0;
sqlquery_snprintf(sqlquery, "SELECT %s.repmgr_get_last_standby_location()", repmgr_schema);
sqlquery_snprintf(sqlquery, "SELECT %s.repmgr_get_last_standby_location()",
repmgr_schema);
res = PQexec(nodeConn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_err(_("PQexec failed: %s.\nReport an invalid value to not be considered as new primary and exit.\n"), PQerrorMessage(nodeConn));
log_err(_("PQexec failed: %s.\nReport an invalid value to not"
"be considered as new primary and exit.\n"),
PQerrorMessage(nodeConn));
PQclear(res);
PQfinish(nodeConn);
terminate(ERR_DB_QUERY);
@@ -958,7 +1021,8 @@ do_failover(void)
if (sscanf(PQgetvalue(res, 0, 0), "%X/%X", &uxlogid, &uxrecoff) != 2)
{
log_info(_("could not parse transaction log location \"%s\"\n"), PQgetvalue(res, 0, 0));
log_info(_("could not parse transaction log location \"%s\"\n"),
PQgetvalue(res, 0, 0));
/* we can't do anything but fail at this point... */
if (*PQgetvalue(res, 0, 0) == '\0')
@@ -1009,7 +1073,10 @@ do_failover(void)
if (!find_best)
{
/* start with the first ready node, and then move on to the next one */
/*
* start with the first ready node, and then move on to the next
* one
*/
best_candidate.nodeId = nodes[i].nodeId;
XLAssign(best_candidate.xlog_location, nodes[i].xlog_location);
best_candidate.is_ready = nodes[i].is_ready;
@@ -1018,10 +1085,11 @@ do_failover(void)
}
/* we use the macros provided by xlogdefs.h to compare XLogRecPtr */
/*
* Nodes are retrieved ordered by priority, so if the current
* best candidate is lower than the next node's wal location
* then assign next node as the new best candidate.
* Nodes are retrieved ordered by priority, so if the current best
* candidate is lower than the next node's wal location then assign
* next node as the new best candidate.
*/
if (XLByteLT(best_candidate.xlog_location, nodes[i].xlog_location))
{
@@ -1037,7 +1105,8 @@ do_failover(void)
{
if (best_candidate.is_witness)
{
log_err(_("%s: Node selected as new master is a witness. Can't be promoted.\n"), progname);
log_err(_("%s: Node selected as new master is a witness. Can't be promoted.\n"),
progname);
terminate(ERR_FAILOVER_FAIL);
}
@@ -1047,7 +1116,8 @@ do_failover(void)
if (verbose)
log_info(_("%s: This node is the best candidate to be the new primary, promoting...\n"),
progname);
log_debug(_("promote command is: \"%s\"\n"), local_options.promote_command);
log_debug(_("promote command is: \"%s\"\n"),
local_options.promote_command);
if (log_type == REPMGR_STDERR && *local_options.logfile)
{
@@ -1057,7 +1127,8 @@ do_failover(void)
r = system(local_options.promote_command);
if (r != 0)
{
log_err(_("%s: promote command failed. You could check and try it manually.\n"), progname);
log_err(_("%s: promote command failed. You could check and try it manually.\n"),
progname);
terminate(ERR_BAD_CONFIG);
}
}
@@ -1070,9 +1141,10 @@ do_failover(void)
log_info(_("%s: Node %d is the best candidate to be the new primary, we should follow it...\n"),
progname, best_candidate.nodeId);
log_debug(_("follow command is: \"%s\"\n"), local_options.follow_command);
/*
* New Primary need some time to be promoted.
* The follow command should take care of that.
* New Primary need some time to be promoted. The follow command
* should take care of that.
*/
if (log_type == REPMGR_STDERR && *local_options.logfile)
{
@@ -1082,13 +1154,15 @@ do_failover(void)
r = system(local_options.follow_command);
if (r != 0)
{
log_err(_("%s: follow command failed. You could check and try it manually.\n"), progname);
log_err(_("%s: follow command failed. You could check and try it manually.\n"),
progname);
terminate(ERR_BAD_CONFIG);
}
}
else
{
log_err(_("%s: Did not find candidates. You should check and try manually.\n"), progname);
log_err(_("%s: Did not find candidates. You should check and try manually.\n"),
progname);
terminate(ERR_FAILOVER_FAIL);
}
@@ -1106,10 +1180,9 @@ CheckConnection(PGconn *conn, const char *type)
int connection_retries;
/*
* Check if the master is still available
* if after local_options.reconnect_attempts * local_options.reconnect_intvl seconds of retries
* we cannot reconnect
* return false
* Check if the master is still available if after
* local_options.reconnect_attempts * local_options.reconnect_intvl
* seconds of retries we cannot reconnect return false
*/
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
{
@@ -1126,14 +1199,16 @@ CheckConnection(PGconn *conn, const char *type)
{
if (connection_retries > 0)
{
log_info(_("%s: Connection to %s has been restored.\n"), progname, type);
log_info(_("%s: Connection to %s has been restored.\n"),
progname, type);
}
return true;
}
}
if (!is_pgup(conn, local_options.master_response_timeout))
{
log_err(_("%s: We couldn't reconnect for long enough, exiting...\n"), progname);
log_err(_("%s: We couldn't reconnect for long enough, exiting...\n"),
progname);
/* XXX Anything else to do here? */
return false;
}
@@ -1200,8 +1275,8 @@ checkNodeConfiguration(void)
}
/*
* If there isn't any results then we have not configured this node yet
* in repmgr, if that is the case we will insert the node to the cluster,
* If there isn't any results then we have not configured this node yet in
* repmgr, if that is the case we will insert the node to the cluster,
* except if it is a witness
*/
if (PQntuples(res) == 0)
@@ -1253,14 +1328,16 @@ walLocationToBytes(char *wal_location)
}
void usage(void)
void
usage(void)
{
log_err(_("%s: Replicator manager daemon \n"), progname);
log_err(_("Try \"%s --help\" for more information.\n"), progname);
}
void help(const char *progname)
void
help(const char *progname)
{
printf(_("Usage: %s [OPTIONS]\n"), progname);
printf(_("Replicator manager daemon for PostgreSQL.\n"));
@@ -1328,7 +1405,8 @@ update_shared_memory(char *last_wal_standby_applied)
res = PQexec(myLocalConn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_warning(_("Cannot update this standby's shared memory: %s\n"), PQerrorMessage(myLocalConn));
log_warning(_("Cannot update this standby's shared memory: %s\n"),
PQerrorMessage(myLocalConn));
/* XXX is this enough reason to terminate this repmgrd? */
}
else if (strcmp(PQgetvalue(res, 0, 0), "f") == 0)
@@ -1350,12 +1428,14 @@ update_registration(void)
" SET conninfo = '%s', "
" priority = %d "
" WHERE id = %d",
repmgr_schema, local_options.conninfo, local_options.priority, local_options.node);
repmgr_schema, local_options.conninfo,
local_options.priority, local_options.node);
res = PQexec(primaryConn, sqlquery);
if (PQresultStatus(res) != PGRES_COMMAND_OK)
{
log_err(_("Cannot update registration: %s\n"), PQerrorMessage(primaryConn));
log_err(_("Cannot update registration: %s\n"),
PQerrorMessage(primaryConn));
terminate(ERR_DB_CON);
}
PQclear(res);
@@ -1364,7 +1444,8 @@ update_registration(void)
static void
do_daemonize()
{
char *ptr, path[MAXLEN];
char *ptr,
path[MAXLEN];
pid_t pid = fork();
int ret;
@@ -1446,7 +1527,9 @@ check_and_create_pid_file(const char *pid_file)
if (fd == NULL)
{
log_err("PID file %s exists but could not opened for reading. If repmgrd is no longer alive remove the file and restart repmgrd.\n", pid_file);
log_err("PID file %s exists but could not opened for reading. "
"If repmgrd is no longer alive remove the file and restart repmgrd.\n",
pid_file);
exit(ERR_BAD_CONFIG);
}
@@ -1466,7 +1549,9 @@ check_and_create_pid_file(const char *pid_file)
{
if (kill(pid, 0) != -1)
{
log_err("PID file %s exists and seems to contain a valid PID. If repmgrd is no longer alive remove the file and restart repmgrd.\n", pid_file);
log_err("PID file %s exists and seems to contain a valid PID. "
"If repmgrd is no longer alive remove the file and restart repmgrd.\n",
pid_file);
exit(ERR_BAD_CONFIG);
}
}

View File

@@ -68,9 +68,9 @@ _PG_init(void)
* In order to create our shared memory area, we have to be loaded via
* shared_preload_libraries. If not, fall out without hooking into any of
* the main system. (We don't throw error here because it seems useful to
* allow the repmgr functions to be created even when the
* module isn't active. The functions must protect themselves against
* being called then, however.)
* allow the repmgr functions to be created even when the module isn't
* active. The functions must protect themselves against being called
* then, however.)
*/
if (!process_shared_preload_libraries_in_progress)
return;

View File

@@ -25,7 +25,9 @@
#include "log.h"
#include "strutil.h"
static int xvsnprintf(char *str, size_t size, const char *format, va_list ap) __attribute__ ((format (PG_PRINTF_ATTRIBUTE, 3, 0)));
static int
xvsnprintf(char *str, size_t size, const char *format, va_list ap)
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));
/* Add strnlen on platforms that don't have it, like OS X */
#ifndef strnlen
@@ -33,6 +35,7 @@ size_t
strnlen(const char *s, size_t n)
{
const char *end = (const char *) memchr(s, '\0', n);
return (end ? end - s : n);
}
#endif
@@ -83,7 +86,8 @@ sqlquery_snprintf(char *str, const char *format, ...)
}
int maxlen_snprintf(char *str, const char *format, ...)
int
maxlen_snprintf(char *str, const char *format,...)
{
va_list arglist;
int retval;

View File

@@ -31,9 +31,17 @@
#define MAXCONNINFO 1024
extern int xsnprintf(char *str, size_t size, const char *format, ...) __attribute__ ((format (PG_PRINTF_ATTRIBUTE, 3, 4)));
extern int sqlquery_snprintf(char *str, const char *format, ...) __attribute__ ((format (PG_PRINTF_ATTRIBUTE, 2, 3)));
extern int maxlen_snprintf(char *str, const char *format, ...) __attribute__ ((format (PG_PRINTF_ATTRIBUTE, 2, 3)));
extern int
xsnprintf(char *str, size_t size, const char *format,...)
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
extern int
sqlquery_snprintf(char *str, const char *format,...)
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
extern int
maxlen_snprintf(char *str, const char *format,...)
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
/* Add strnlen on platforms that don't have it, like OS X */
#ifndef strnlen

View File

@@ -2,4 +2,5 @@
#define _VERSION_H_
#define REPMGR_VERSION "2.0"
#endif