Improve parameter validation, logging, and error

handling in repmgrd
This commit is contained in:
Greg Smith
2011-02-24 18:13:16 -05:00
parent c7b62003b2
commit f928edf897
4 changed files with 50 additions and 25 deletions

View File

@@ -30,6 +30,12 @@ parse_config(const char* config_file, t_configuration_options* options)
FILE *fp = fopen (config_file, "r"); FILE *fp = fopen (config_file, "r");
/* Initialize */
memset(options->cluster_name, 0, sizeof(options->cluster_name));
options->node = -1;
memset(options->conninfo, 0, sizeof(options->conninfo));
memset(options->rsync_options, 0, sizeof(options->rsync_options));
/* /*
* Since some commands don't require a config file at all, not * Since some commands don't require a config file at all, not
* having one isn't necessarily a problem. * having one isn't necessarily a problem.
@@ -40,12 +46,6 @@ parse_config(const char* config_file, t_configuration_options* options)
return; return;
} }
/* Initialize */
memset(options->cluster_name, 0, sizeof(options->cluster_name));
options->node = -1;
memset(options->conninfo, 0, sizeof(options->conninfo));
memset(options->rsync_options, 0, sizeof(options->rsync_options));
/* Read next line */ /* Read next line */
while ((s = fgets (buff, sizeof buff, fp)) != NULL) while ((s = fgets (buff, sizeof buff, fp)) != NULL)
{ {

View File

@@ -238,6 +238,9 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster,
} }
/* find all nodes belonging to this cluster */ /* find all nodes belonging to this cluster */
log_info(_("finding node lost for cluster '%s'\n"),
master_conninfo);
sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes " sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes "
" WHERE cluster = '%s' and id <> %d", " WHERE cluster = '%s' and id <> %d",
schema_quoted, cluster, id); schema_quoted, cluster, id);
@@ -257,7 +260,7 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster,
/* initialize with the values of the current node being processed */ /* initialize with the values of the current node being processed */
*master_id = atoi(PQgetvalue(res1, i, 0)); *master_id = atoi(PQgetvalue(res1, i, 0));
strncpy(master_conninfo, PQgetvalue(res1, i, 2), MAXCONNINFO); strncpy(master_conninfo, PQgetvalue(res1, i, 2), MAXCONNINFO);
log_info(_("checking role of cluster '%s'\n"), log_info(_("checking role of cluster node '%s'\n"),
master_conninfo); master_conninfo);
master_conn = establishDBConnection(master_conninfo, false); master_conn = establishDBConnection(master_conninfo, false);

View File

@@ -343,6 +343,7 @@ do_master_register(void)
} }
/* Check we are a master */ /* Check we are a master */
log_info(_("%s connected to master, checking its state\n"), progname);
if (is_standby(conn)) if (is_standby(conn))
{ {
log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname); log_err(_("%s needs master to be PostgreSQL 9.0 or better\n"), progname);

View File

@@ -60,7 +60,7 @@ t_configuration_options config = {};
static void help(const char* progname); static void help(const char* progname);
static void usage(void); static void usage(void);
static void checkClusterConfiguration(void); static void checkClusterConfiguration(PGconn *conn,PGconn *primary);
static void checkNodeConfiguration(char *conninfo); static void checkNodeConfiguration(char *conninfo);
static void CancelQuery(void); static void CancelQuery(void);
@@ -147,15 +147,24 @@ main(int argc, char **argv)
if (local_options.node == -1) if (local_options.node == -1)
{ {
log_err("Node information is missing. " log_err("Node information is missing. "
"Check the configuration file.\n"); "Check the configuration file, or provide one if you have not done so.\n");
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
logger_init(progname, local_options.loglevel, local_options.logfacility); logger_init(progname, local_options.loglevel, local_options.logfacility);
if (verbose)
logger_min_verbose(LOG_INFO);
snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name); snprintf(repmgr_schema, MAXLEN, "%s%s", DEFAULT_REPMGR_SCHEMA_PREFIX, local_options.cluster_name);
printf("Establishing database connection\n");
log_info(_("%s Connecting to database '%s'\n"), progname, local_options.conninfo);
myLocalConn = establishDBConnection(local_options.conninfo, true); myLocalConn = establishDBConnection(local_options.conninfo, true);
/* should be v9 or better */ /* should be v9 or better */
log_info(_("%s connected to database, checking its state\n"), progname);
pg_version(myLocalConn, standby_version); pg_version(myLocalConn, standby_version);
if (strcmp(standby_version, "") == 0) if (strcmp(standby_version, "") == 0)
{ {
@@ -178,13 +187,19 @@ main(int argc, char **argv)
else else
{ {
/* I need the id of the primary as well as a connection to it */ /* I need the id of the primary as well as a connection to it */
log_info(_("%s Connecting to primary for cluster '%s'\n"),
progname, local_options.cluster_name);
primaryConn = getMasterConnection(myLocalConn, local_options.node, primaryConn = getMasterConnection(myLocalConn, local_options.node,
local_options.cluster_name, &primary_options.node,NULL); local_options.cluster_name,
&primary_options.node,NULL);
if (primaryConn == NULL) if (primaryConn == NULL)
{
CloseConnections();
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
}
checkClusterConfiguration(); checkClusterConfiguration(myLocalConn,primaryConn);
checkNodeConfiguration(local_options.conninfo); checkNodeConfiguration(local_options.conninfo);
if (myLocalMode == STANDBY_MODE) if (myLocalMode == STANDBY_MODE)
{ {
@@ -355,20 +370,21 @@ MonitorExecute(void)
static void static void
checkClusterConfiguration(void) checkClusterConfiguration(PGconn *conn, PGconn *primary)
{ {
PGresult *res; PGresult *res;
log_info(_("%s Checking cluster configuration with schema '%s'\n"),
progname, repmgr_schema);
sqlquery_snprintf(sqlquery, "SELECT oid FROM pg_class " sqlquery_snprintf(sqlquery, "SELECT oid FROM pg_class "
" WHERE oid = '%s.repl_nodes'::regclass", " WHERE oid = '%s.repl_nodes'::regclass",
repmgr_schema); repmgr_schema);
res = PQexec(myLocalConn, sqlquery); res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (PQresultStatus(res) != PGRES_TUPLES_OK)
{ {
log_err("PQexec failed: %s\n", PQerrorMessage(myLocalConn)); log_err("PQexec failed: %s\n", PQerrorMessage(conn));
PQclear(res); PQclear(res);
PQfinish(myLocalConn); CloseConnections();
PQfinish(primaryConn);
exit(ERR_DB_QUERY); exit(ERR_DB_QUERY);
} }
@@ -383,8 +399,7 @@ checkClusterConfiguration(void)
{ {
log_err("The replication cluster is not configured\n"); log_err("The replication cluster is not configured\n");
PQclear(res); PQclear(res);
PQfinish(myLocalConn); CloseConnections();
PQfinish(primaryConn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
PQclear(res); PQclear(res);
@@ -399,17 +414,19 @@ checkNodeConfiguration(char *conninfo)
/* /*
* Check if we have my node information in repl_nodes * Check if we have my node information in repl_nodes
*/ */
log_info(_("%s Checking node %d in cluster '%s'\n"),
progname, local_options.node, local_options.cluster_name);
sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes " sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes "
" WHERE id = %d AND cluster = '%s' ", " WHERE id = %d AND cluster = '%s' ",
repmgr_schema, local_options.node, local_options.cluster_name); repmgr_schema, local_options.node,
local_options.cluster_name);
res = PQexec(myLocalConn, sqlquery); res = PQexec(myLocalConn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK) if (PQresultStatus(res) != PGRES_TUPLES_OK)
{ {
log_err("PQexec failed: %s\n", PQerrorMessage(myLocalConn)); log_err("PQexec failed: %s\n", PQerrorMessage(myLocalConn));
PQclear(res); PQclear(res);
PQfinish(myLocalConn); CloseConnections();
PQfinish(primaryConn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
@@ -422,16 +439,19 @@ checkNodeConfiguration(char *conninfo)
PQclear(res); PQclear(res);
/* Adding the node */ /* Adding the node */
log_info(_("%s Adding node %d to cluster '%s'\n"),
progname, local_options.node, local_options.cluster_name);
sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes " sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes "
"VALUES (%d, '%s', '%s')", "VALUES (%d, '%s', '%s')",
repmgr_schema, local_options.node, local_options.cluster_name, local_options.conninfo); repmgr_schema, local_options.node,
local_options.cluster_name,
local_options.conninfo);
if (!PQexec(primaryConn, sqlquery)) if (!PQexec(primaryConn, sqlquery))
{ {
log_err("Cannot insert node details, %s\n", log_err("Cannot insert node details, %s\n",
PQerrorMessage(primaryConn)); PQerrorMessage(primaryConn));
PQfinish(myLocalConn); CloseConnections();
PQfinish(primaryConn);
exit(ERR_BAD_CONFIG); exit(ERR_BAD_CONFIG);
} }
} }
@@ -460,6 +480,7 @@ void usage(void)
log_err(_("Try \"%s --help\" for more information.\n"), progname); log_err(_("Try \"%s --help\" for more information.\n"), progname);
} }
void help(const char *progname) void help(const char *progname)
{ {
printf(_("\n%s: Replicator manager daemon \n"), progname); printf(_("\n%s: Replicator manager daemon \n"), progname);
@@ -474,7 +495,6 @@ void help(const char *progname)
} }
#ifndef WIN32 #ifndef WIN32
static void static void
handle_sigint(SIGNAL_ARGS) handle_sigint(SIGNAL_ARGS)
@@ -482,6 +502,7 @@ handle_sigint(SIGNAL_ARGS)
CloseConnections(); CloseConnections();
} }
static void static void
setup_cancel_handler(void) setup_cancel_handler(void)
{ {