mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 07:06:30 +00:00
Attack of whitespace pedantry
pgsql conventions (tabs, four-spaces-wide, etc) applied all around. Also tried to fix some very tiny capitalization errors, auto-fill problems, and some inter-block vertical whitespacing issues. Long strings in repmgr.c were left intact, though. They are rather numerous and are less of a problem than tiny bits of function calls and comments wrapping over a line; the latter kind of problem has been mostly fixed. Signed-off-by: Dan Farina <drfarina@acm.org> Signed-off-by: Peter van Hardenberg <pvh@heroku.com>
This commit is contained in:
committed by
Peter van Hardenberg
parent
56c65acd99
commit
af2edf10a0
80
check_dir.c
80
check_dir.c
@@ -31,63 +31,63 @@ static int mkdir_p(char *path, mode_t omode);
|
||||
int
|
||||
check_dir(char *dir)
|
||||
{
|
||||
DIR *chkdir;
|
||||
struct dirent *file;
|
||||
int result = 1;
|
||||
DIR *chkdir;
|
||||
struct dirent *file;
|
||||
int result = 1;
|
||||
|
||||
errno = 0;
|
||||
errno = 0;
|
||||
|
||||
chkdir = opendir(dir);
|
||||
chkdir = opendir(dir);
|
||||
|
||||
if (!chkdir)
|
||||
return (errno == ENOENT) ? 0 : -1;
|
||||
if (!chkdir)
|
||||
return (errno == ENOENT) ? 0 : -1;
|
||||
|
||||
while ((file = readdir(chkdir)) != NULL)
|
||||
{
|
||||
if (strcmp(".", file->d_name) == 0 ||
|
||||
strcmp("..", file->d_name) == 0)
|
||||
{
|
||||
/* skip this and parent directory */
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
result = 2; /* not empty */
|
||||
break;
|
||||
}
|
||||
}
|
||||
while ((file = readdir(chkdir)) != NULL)
|
||||
{
|
||||
if (strcmp(".", file->d_name) == 0 ||
|
||||
strcmp("..", file->d_name) == 0)
|
||||
{
|
||||
/* skip this and parent directory */
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
result = 2; /* not empty */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WIN32
|
||||
/*
|
||||
* This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
|
||||
* released version
|
||||
*/
|
||||
if (GetLastError() == ERROR_NO_MORE_FILES)
|
||||
errno = 0;
|
||||
/*
|
||||
* This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
|
||||
* released version
|
||||
*/
|
||||
if (GetLastError() == ERROR_NO_MORE_FILES)
|
||||
errno = 0;
|
||||
#endif
|
||||
|
||||
closedir(chkdir);
|
||||
closedir(chkdir);
|
||||
|
||||
if (errno != 0)
|
||||
return -1; /* some kind of I/O error? */
|
||||
if (errno != 0)
|
||||
return -1; /* some kind of I/O error? */
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Create directory
|
||||
* Create directory
|
||||
*/
|
||||
bool
|
||||
create_directory(char *dir)
|
||||
{
|
||||
if (mkdir_p(dir, 0700) == 0)
|
||||
return true;
|
||||
if (mkdir_p(dir, 0700) == 0)
|
||||
return true;
|
||||
|
||||
fprintf(stderr, _("Could not create directory \"%s\": %s\n"),
|
||||
dir, strerror(errno));
|
||||
fprintf(stderr, _("Could not create directory \"%s\": %s\n"),
|
||||
dir, strerror(errno));
|
||||
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -114,10 +114,10 @@ mkdir_p(char *path, mode_t omode)
|
||||
{
|
||||
struct stat sb;
|
||||
mode_t numask,
|
||||
oumask;
|
||||
oumask;
|
||||
int first,
|
||||
last,
|
||||
retval;
|
||||
last,
|
||||
retval;
|
||||
char *p;
|
||||
|
||||
p = path;
|
||||
@@ -212,5 +212,5 @@ is_pg_dir(char *dir)
|
||||
|
||||
sprintf(path, "%s/PG_VERSION", dir);
|
||||
|
||||
return (stat(path, &sb) == 0) ? true : false;
|
||||
return (stat(path, &sb) == 0) ? true : false;
|
||||
}
|
||||
|
||||
66
config.c
66
config.c
@@ -8,13 +8,14 @@
|
||||
#include "repmgr.h"
|
||||
|
||||
void
|
||||
parse_config(const char *config_file, char *cluster_name, int *node, char *conninfo)
|
||||
parse_config(const char *config_file, char *cluster_name, int *node,
|
||||
char *conninfo)
|
||||
{
|
||||
char *s, buff[256];
|
||||
FILE *fp = fopen (config_file, "r");
|
||||
|
||||
if (fp == NULL)
|
||||
return;
|
||||
return;
|
||||
|
||||
/* Read next line */
|
||||
while ((s = fgets (buff, sizeof buff, fp)) != NULL)
|
||||
@@ -22,46 +23,47 @@ parse_config(const char *config_file, char *cluster_name, int *node, char *conni
|
||||
char name[MAXLEN];
|
||||
char value[MAXLEN];
|
||||
|
||||
/* Skip blank lines and comments */
|
||||
if (buff[0] == '\n' || buff[0] == '#')
|
||||
continue;
|
||||
/* Skip blank lines and comments */
|
||||
if (buff[0] == '\n' || buff[0] == '#')
|
||||
continue;
|
||||
|
||||
/* Parse name/value pair from line */
|
||||
/* Parse name/value pair from line */
|
||||
parse_line(buff, name, value);
|
||||
|
||||
/* Copy into correct entry in parameters struct */
|
||||
if (strcmp(name, "cluster") == 0)
|
||||
strncpy (cluster_name, value, MAXLEN);
|
||||
else if (strcmp(name, "node") == 0)
|
||||
*node = atoi(value);
|
||||
else if (strcmp(name, "conninfo") == 0)
|
||||
strncpy (conninfo, value, MAXLEN);
|
||||
else
|
||||
printf ("WARNING: %s/%s: Unknown name/value pair!\n", name, value);
|
||||
}
|
||||
/* Copy into correct entry in parameters struct */
|
||||
if (strcmp(name, "cluster") == 0)
|
||||
strncpy (cluster_name, value, MAXLEN);
|
||||
else if (strcmp(name, "node") == 0)
|
||||
*node = atoi(value);
|
||||
else if (strcmp(name, "conninfo") == 0)
|
||||
strncpy (conninfo, value, MAXLEN);
|
||||
else
|
||||
printf("WARNING: %s/%s: Unknown name/value pair!\n",
|
||||
name, value);
|
||||
}
|
||||
|
||||
/* Close file */
|
||||
fclose (fp);
|
||||
/* Close file */
|
||||
fclose (fp);
|
||||
}
|
||||
|
||||
char *
|
||||
trim (char *s)
|
||||
{
|
||||
/* Initialize start, end pointers */
|
||||
char *s1 = s, *s2 = &s[strlen (s) - 1];
|
||||
/* Initialize start, end pointers */
|
||||
char *s1 = s, *s2 = &s[strlen (s) - 1];
|
||||
|
||||
/* Trim and delimit right side */
|
||||
while ( (isspace (*s2)) && (s2 >= s1) )
|
||||
s2--;
|
||||
*(s2+1) = '\0';
|
||||
/* Trim and delimit right side */
|
||||
while ( (isspace (*s2)) && (s2 >= s1) )
|
||||
s2--;
|
||||
*(s2+1) = '\0';
|
||||
|
||||
/* Trim left side */
|
||||
while ( (isspace (*s1)) && (s1 < s2) )
|
||||
s1++;
|
||||
/* Trim left side */
|
||||
while ( (isspace (*s1)) && (s1 < s2) )
|
||||
s1++;
|
||||
|
||||
/* Copy finished string */
|
||||
strcpy (s, s1);
|
||||
return s;
|
||||
/* Copy finished string */
|
||||
strcpy (s, s1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -86,7 +88,7 @@ parse_line(char *buff, char *name, char *value)
|
||||
i++;
|
||||
/*
|
||||
* Now the value
|
||||
*/
|
||||
*/
|
||||
j = 0;
|
||||
for ( ; i < MAXLEN; i++)
|
||||
if (buff[i] == '\'')
|
||||
@@ -96,5 +98,5 @@ parse_line(char *buff, char *name, char *value)
|
||||
else
|
||||
break;
|
||||
value[j] = '\0';
|
||||
trim(value);
|
||||
trim(value);
|
||||
}
|
||||
|
||||
3
config.h
3
config.h
@@ -4,6 +4,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
void parse_config(const char *config_file, char *cluster_name, int *node, char *service);
|
||||
void parse_config(const char *config_file, char *cluster_name, int *node,
|
||||
char *service);
|
||||
void parse_line(char *buff, char *name, char *value);
|
||||
char *trim(char *s);
|
||||
|
||||
172
dbutils.c
172
dbutils.c
@@ -12,19 +12,21 @@ PGconn *
|
||||
establishDBConnection(const char *conninfo, const bool exit_on_error)
|
||||
{
|
||||
PGconn *conn;
|
||||
/* Make a connection to the database */
|
||||
conn = PQconnectdb(conninfo);
|
||||
/* Check to see that the backend connection was successfully made */
|
||||
if ((PQstatus(conn) != CONNECTION_OK))
|
||||
{
|
||||
fprintf(stderr, "Connection to database failed: %s",
|
||||
PQerrorMessage(conn));
|
||||
|
||||
/* Make a connection to the database */
|
||||
conn = PQconnectdb(conninfo);
|
||||
|
||||
/* Check to see that the backend connection was successfully made */
|
||||
if ((PQstatus(conn) != CONNECTION_OK))
|
||||
{
|
||||
fprintf(stderr, "Connection to database failed: %s",
|
||||
PQerrorMessage(conn));
|
||||
if (exit_on_error)
|
||||
{
|
||||
PQfinish(conn);
|
||||
PQfinish(conn);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return conn;
|
||||
}
|
||||
@@ -34,29 +36,30 @@ establishDBConnection(const char *conninfo, const bool exit_on_error)
|
||||
bool
|
||||
is_standby(PGconn *conn)
|
||||
{
|
||||
PGresult *res;
|
||||
PGresult *res;
|
||||
bool result;
|
||||
|
||||
res = PQexec(conn, "SELECT pg_is_in_recovery()");
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't query server mode: %s", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
res = PQexec(conn, "SELECT pg_is_in_recovery()");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't query server mode: %s", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (strcmp(PQgetvalue(res, 0, 0), "f") == 0)
|
||||
result = false;
|
||||
else
|
||||
result = true;
|
||||
result = true;
|
||||
|
||||
PQclear(res);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
/*
|
||||
* If postgreSQL version is 9 or superior returns the major version
|
||||
* if 8 or inferior returns an empty string
|
||||
*/
|
||||
@@ -69,20 +72,26 @@ pg_version(PGconn *conn)
|
||||
int major_version1;
|
||||
char *major_version2;
|
||||
|
||||
res = PQexec(conn, "WITH pg_version(ver) AS (SELECT split_part(version(), ' ', 2)) "
|
||||
"SELECT split_part(ver, '.', 1), split_part(ver, '.', 2) FROM pg_version");
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
res = PQexec(conn,
|
||||
"WITH pg_version(ver) AS "
|
||||
"(SELECT split_part(version(), ' ', 2)) "
|
||||
"SELECT split_part(ver, '.', 1), split_part(ver, '.', 2) "
|
||||
"FROM pg_version");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "PQexec failed: %s", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
exit(1);
|
||||
}
|
||||
major_version1 = atoi(PQgetvalue(res, 0, 0));
|
||||
major_version2 = PQgetvalue(res, 0, 1);
|
||||
PQclear(res);
|
||||
}
|
||||
|
||||
major_version1 = atoi(PQgetvalue(res, 0, 0));
|
||||
major_version2 = PQgetvalue(res, 0, 1);
|
||||
PQclear(res);
|
||||
|
||||
major_version = malloc(10);
|
||||
|
||||
if (major_version1 >= 9)
|
||||
{
|
||||
/* form a major version string */
|
||||
@@ -96,27 +105,28 @@ pg_version(PGconn *conn)
|
||||
|
||||
|
||||
bool
|
||||
guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value)
|
||||
guc_setted(PGconn *conn, const char *parameter, const char *op,
|
||||
const char *value)
|
||||
{
|
||||
PGresult *res;
|
||||
char sqlquery[8192];
|
||||
|
||||
sprintf(sqlquery, "SELECT true FROM pg_settings "
|
||||
" WHERE name = '%s' AND setting %s '%s'",
|
||||
parameter, op, value);
|
||||
" WHERE name = '%s' AND setting %s '%s'",
|
||||
parameter, op, value);
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "PQexec failed: %s", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if (PQntuples(res) == 0)
|
||||
{
|
||||
PQclear(res);
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
PQclear(res);
|
||||
|
||||
@@ -131,72 +141,78 @@ get_cluster_size(PGconn *conn)
|
||||
const char *size;
|
||||
char sqlquery[8192];
|
||||
|
||||
sprintf(sqlquery, "SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) "
|
||||
" FROM pg_database ");
|
||||
sprintf(sqlquery,
|
||||
"SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) "
|
||||
" FROM pg_database ");
|
||||
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
res = PQexec(conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "PQexec failed: %s", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
PQclear(res);
|
||||
PQfinish(conn);
|
||||
exit(1);
|
||||
}
|
||||
size = PQgetvalue(res, 0, 0);
|
||||
}
|
||||
size = PQgetvalue(res, 0, 0);
|
||||
PQclear(res);
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* get a connection to master by reading repl_nodes, creating a connection
|
||||
* get a connection to master by reading repl_nodes, creating a connection
|
||||
* to each node (one at a time) and finding if it is a master or a standby
|
||||
*/
|
||||
PGconn *
|
||||
getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id)
|
||||
getMasterConnection(PGconn *standby_conn, int id, char *cluster,
|
||||
int *master_id)
|
||||
{
|
||||
PGconn *master_conn = NULL;
|
||||
PGresult *res1;
|
||||
PGresult *res2;
|
||||
char sqlquery[8192];
|
||||
PGresult *res1;
|
||||
PGresult *res2;
|
||||
char sqlquery[8192];
|
||||
char master_conninfo[8192];
|
||||
int i;
|
||||
|
||||
/* find all nodes belonging to this cluster */
|
||||
sprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes "
|
||||
" WHERE cluster = '%s' and id <> %d",
|
||||
cluster, cluster, id);
|
||||
" WHERE cluster = '%s' and id <> %d",
|
||||
cluster, cluster, id);
|
||||
|
||||
res1 = PQexec(standby_conn, sqlquery);
|
||||
if (PQresultStatus(res1) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't get nodes info: %s\n", PQerrorMessage(standby_conn));
|
||||
PQclear(res1);
|
||||
res1 = PQexec(standby_conn, sqlquery);
|
||||
if (PQresultStatus(res1) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't get nodes info: %s\n",
|
||||
PQerrorMessage(standby_conn));
|
||||
PQclear(res1);
|
||||
PQfinish(standby_conn);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < PQntuples(res1); i++)
|
||||
{
|
||||
{
|
||||
/* initialize with the values of the current node being processed */
|
||||
*master_id = atoi(PQgetvalue(res1, i, 0));
|
||||
strcpy(master_conninfo, PQgetvalue(res1, i, 2));
|
||||
master_conn = establishDBConnection(master_conninfo, false);
|
||||
|
||||
if (PQstatus(master_conn) != CONNECTION_OK)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* I can't use the is_standby() function here because on error that
|
||||
* function closes the connection i pass and exit, but i still need to close
|
||||
* standby_conn
|
||||
/*
|
||||
* I can't use the is_standby() function here because on error that
|
||||
* function closes the connection i pass and exit, but i still need to
|
||||
* close standby_conn
|
||||
*/
|
||||
res2 = PQexec(master_conn, "SELECT pg_is_in_recovery()");
|
||||
if (PQresultStatus(res2) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't get recovery state from this node: %s\n", PQerrorMessage(master_conn));
|
||||
PQclear(res2);
|
||||
res2 = PQexec(master_conn, "SELECT pg_is_in_recovery()");
|
||||
|
||||
if (PQresultStatus(res2) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "Can't get recovery state from this node: %s\n",
|
||||
PQerrorMessage(master_conn));
|
||||
PQclear(res2);
|
||||
PQfinish(master_conn);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* if false, this is the master */
|
||||
if (strcmp(PQgetvalue(res2, 0, 0), "f") == 0)
|
||||
@@ -212,17 +228,17 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id)
|
||||
PQfinish(master_conn);
|
||||
*master_id = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If we finish this loop without finding a master then
|
||||
* we doesn't have the info or the master has failed (or we
|
||||
* reached max_connections or superuser_reserved_connections,
|
||||
* anything else i'm missing?),
|
||||
* Probably we will need to check the error to know if we need
|
||||
* to start failover procedure or just fix some situation on the
|
||||
* standby.
|
||||
*/
|
||||
* we doesn't have the info or the master has failed (or we
|
||||
* reached max_connections or superuser_reserved_connections,
|
||||
* anything else I'm missing?).
|
||||
*
|
||||
* Probably we will need to check the error to know if we need
|
||||
* to start failover procedure or just fix some situation on the
|
||||
* standby.
|
||||
*/
|
||||
PQclear(res1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
10
dbutils.h
10
dbutils.h
@@ -5,8 +5,10 @@
|
||||
*/
|
||||
|
||||
PGconn *establishDBConnection(const char *conninfo, const bool exit_on_error);
|
||||
bool is_standby(PGconn *conn);
|
||||
bool is_standby(PGconn *conn);
|
||||
char *pg_version(PGconn *conn);
|
||||
bool guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value);
|
||||
const char *get_cluster_size(PGconn *conn);
|
||||
PGconn * getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id);
|
||||
bool guc_setted(PGconn *conn, const char *parameter, const char *op,
|
||||
const char *value);
|
||||
const char *get_cluster_size(PGconn *conn);
|
||||
PGconn * getMasterConnection(PGconn *standby_conn, int id, char *cluster,
|
||||
int *master_id);
|
||||
|
||||
26
repmgr.sql
26
repmgr.sql
@@ -7,25 +7,25 @@ CREATE SCHEMA repmgr;
|
||||
*/
|
||||
drop table if exists repl_nodes cascade;
|
||||
CREATE TABLE repl_nodes (
|
||||
id integer primary key,
|
||||
cluster text not null, -- Name to identify the cluster
|
||||
conninfo text not null
|
||||
id integer primary key,
|
||||
cluster text not null, -- Name to identify the cluster
|
||||
conninfo text not null
|
||||
);
|
||||
ALTER TABLE repl_nodes OWNER TO repmgr;
|
||||
|
||||
/*
|
||||
* Keeps monitor info about every node and their relative "position"
|
||||
* Keeps monitor info about every node and their relative "position"
|
||||
* to primary
|
||||
*/
|
||||
drop table if exists repl_monitor cascade;
|
||||
CREATE TABLE repl_monitor (
|
||||
primary_node INTEGER NOT NULL,
|
||||
standby_node INTEGER NOT NULL,
|
||||
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
last_wal_primary_location TEXT NOT NULL,
|
||||
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
last_wal_primary_location TEXT NOT NULL,
|
||||
last_wal_standby_location TEXT NOT NULL,
|
||||
replication_lag BIGINT NOT NULL,
|
||||
apply_lag BIGINT NOT NULL
|
||||
replication_lag BIGINT NOT NULL,
|
||||
apply_lag BIGINT NOT NULL
|
||||
);
|
||||
ALTER TABLE repl_monitor OWNER TO repmgr;
|
||||
|
||||
@@ -33,10 +33,10 @@ ALTER TABLE repl_monitor OWNER TO repmgr;
|
||||
/*
|
||||
* This view shows the latest monitor info about every node.
|
||||
* Interesting thing to see:
|
||||
* replication_lag: in bytes (this is how far the latest xlog record
|
||||
* replication_lag: in bytes (this is how far the latest xlog record
|
||||
* we have received is from master)
|
||||
* apply_lag: in bytes (this is how far the latest xlog record
|
||||
* we have applied is from the latest record we
|
||||
* we have applied is from the latest record we
|
||||
* have received)
|
||||
* time_lag: how many seconds are we from being up-to-date with master
|
||||
*/
|
||||
@@ -45,9 +45,9 @@ CREATE VIEW repl_status AS
|
||||
WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node
|
||||
ORDER BY last_monitor_time desc)
|
||||
FROM repl_monitor)
|
||||
SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location,
|
||||
last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag,
|
||||
pg_size_pretty(apply_lag) apply_lag,
|
||||
SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location,
|
||||
last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag,
|
||||
pg_size_pretty(apply_lag) apply_lag,
|
||||
age(now(), last_monitor_time) AS time_lag
|
||||
FROM monitor_info a
|
||||
WHERE row_number = 1;
|
||||
|
||||
295
repmgrd.c
295
repmgrd.c
@@ -4,7 +4,7 @@
|
||||
*
|
||||
* Replication manager daemon
|
||||
* This module connects to the nodes of a replication cluster and monitors
|
||||
* how far are they from master
|
||||
* how far are they from master
|
||||
*/
|
||||
|
||||
#include <signal.h>
|
||||
@@ -17,11 +17,11 @@
|
||||
|
||||
#include "libpq/pqsignal.h"
|
||||
|
||||
char myClusterName[MAXLEN];
|
||||
char myClusterName[MAXLEN];
|
||||
|
||||
/* Local info */
|
||||
int myLocalMode = STANDBY_MODE;
|
||||
int myLocalId = -1;
|
||||
int myLocalMode = STANDBY_MODE;
|
||||
int myLocalId = -1;
|
||||
PGconn *myLocalConn;
|
||||
|
||||
/* Primary info */
|
||||
@@ -49,23 +49,23 @@ static unsigned long long int walLocationToBytes(char *wal_location);
|
||||
static void handle_sigint(SIGNAL_ARGS);
|
||||
static void setup_cancel_handler(void);
|
||||
|
||||
#define CloseConnections() \
|
||||
if (PQisBusy(primaryConn) == 1) \
|
||||
CancelQuery(); \
|
||||
if (myLocalConn != NULL) \
|
||||
PQfinish(myLocalConn); \
|
||||
if (primaryConn != NULL) \
|
||||
PQfinish(primaryConn);
|
||||
#define CloseConnections() \
|
||||
if (PQisBusy(primaryConn) == 1) \
|
||||
CancelQuery(); \
|
||||
if (myLocalConn != NULL) \
|
||||
PQfinish(myLocalConn); \
|
||||
if (primaryConn != NULL) \
|
||||
PQfinish(primaryConn);
|
||||
|
||||
/*
|
||||
* Every 3 seconds, insert monitor info
|
||||
*/
|
||||
#define MonitorCheck() \
|
||||
for (;;) \
|
||||
{ \
|
||||
MonitorExecute(); \
|
||||
sleep(3); \
|
||||
}
|
||||
#define MonitorCheck() \
|
||||
for (;;) \
|
||||
{ \
|
||||
MonitorExecute(); \
|
||||
sleep(3); \
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
@@ -80,7 +80,7 @@ main(int argc, char **argv)
|
||||
int optindex;
|
||||
int c;
|
||||
|
||||
char conninfo[MAXLEN];
|
||||
char conninfo[MAXLEN];
|
||||
const char *standby_version = NULL;
|
||||
|
||||
progname = get_progname(argv[0]);
|
||||
@@ -111,13 +111,14 @@ main(int argc, char **argv)
|
||||
verbose = true;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
|
||||
fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
|
||||
progname);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
setup_cancel_handler();
|
||||
|
||||
|
||||
if (config_file == NULL)
|
||||
{
|
||||
config_file = malloc(5 + sizeof(CONFIG_FILE));
|
||||
@@ -126,30 +127,31 @@ main(int argc, char **argv)
|
||||
|
||||
/*
|
||||
* Read the configuration file: repmgr.conf
|
||||
*/
|
||||
*/
|
||||
parse_config(config_file, myClusterName, &myLocalId, conninfo);
|
||||
if (myLocalId == -1)
|
||||
if (myLocalId == -1)
|
||||
{
|
||||
fprintf(stderr, "Node information is missing. "
|
||||
"Check the configuration file.\n");
|
||||
"Check the configuration file.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
myLocalConn = establishDBConnection(conninfo, true);
|
||||
myLocalConn = establishDBConnection(conninfo, true);
|
||||
|
||||
/* should be v9 or better */
|
||||
standby_version = pg_version(myLocalConn);
|
||||
if (strcmp(standby_version, "") == 0)
|
||||
{
|
||||
PQfinish(myLocalConn);
|
||||
fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"), progname);
|
||||
fprintf(stderr, _("%s needs standby to be PostgreSQL 9.0 or better\n"),
|
||||
progname);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set my server mode, establish a connection to primary
|
||||
/*
|
||||
* Set my server mode, establish a connection to primary
|
||||
* and start monitor
|
||||
*/
|
||||
*/
|
||||
myLocalMode = is_standby(myLocalConn) ? STANDBY_MODE : PRIMARY_MODE;
|
||||
if (myLocalMode == PRIMARY_MODE)
|
||||
{
|
||||
@@ -160,7 +162,8 @@ main(int argc, char **argv)
|
||||
else
|
||||
{
|
||||
/* I need the id of the primary as well as a connection to it */
|
||||
primaryConn = getMasterConnection(myLocalConn, myLocalId, myClusterName, &primaryId);
|
||||
primaryConn = getMasterConnection(myLocalConn, myLocalId,
|
||||
myClusterName, &primaryId);
|
||||
if (primaryConn == NULL)
|
||||
exit(1);
|
||||
}
|
||||
@@ -169,13 +172,13 @@ main(int argc, char **argv)
|
||||
checkNodeConfiguration(conninfo);
|
||||
if (myLocalMode == STANDBY_MODE)
|
||||
{
|
||||
MonitorCheck();
|
||||
MonitorCheck();
|
||||
}
|
||||
|
||||
/* close the connection to the database and cleanup */
|
||||
CloseConnections();
|
||||
/* close the connection to the database and cleanup */
|
||||
CloseConnections();
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -187,7 +190,7 @@ main(int argc, char **argv)
|
||||
static void
|
||||
MonitorExecute(void)
|
||||
{
|
||||
PGresult *res;
|
||||
PGresult *res;
|
||||
char monitor_standby_timestamp[MAXLEN];
|
||||
char last_wal_primary_location[MAXLEN];
|
||||
char last_wal_standby_received[MAXLEN];
|
||||
@@ -199,9 +202,9 @@ MonitorExecute(void)
|
||||
|
||||
int connection_retries;
|
||||
|
||||
/*
|
||||
* Check if the master is still available, if after 5 minutes of retries
|
||||
* we cannot reconnect, try to get a new master.
|
||||
/*
|
||||
* Check if the master is still available, if after 5 minutes of retries
|
||||
* we cannot reconnect, try to get a new master.
|
||||
*/
|
||||
for (connection_retries = 0; connection_retries < 15; connection_retries++)
|
||||
{
|
||||
@@ -212,7 +215,7 @@ MonitorExecute(void)
|
||||
sleep(20);
|
||||
|
||||
PQreset(primaryConn);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "\n%s: Connection to master has been restored, continue monitoring.\n", progname);
|
||||
@@ -221,11 +224,14 @@ MonitorExecute(void)
|
||||
}
|
||||
if (PQstatus(primaryConn) != CONNECTION_OK)
|
||||
{
|
||||
fprintf(stderr, "\n%s: We couldn't reconnect to master, checking if ", progname);
|
||||
fprintf(stderr, "\n%s: We couldn't reconnect to master, checking if ",
|
||||
progname);
|
||||
fprintf(stderr, "%s: another node has been promoted.\n", progname);
|
||||
for (connection_retries = 0; connection_retries < 6; connection_retries++)
|
||||
for (connection_retries = 0; connection_retries < 6;
|
||||
connection_retries++)
|
||||
{
|
||||
primaryConn = getMasterConnection(myLocalConn, myLocalId, myClusterName, &primaryId);
|
||||
primaryConn = getMasterConnection(myLocalConn, myLocalId,
|
||||
myClusterName, &primaryId);
|
||||
if (PQstatus(primaryConn) == CONNECTION_OK)
|
||||
{
|
||||
/* Connected, we can continue the process so break the loop */
|
||||
@@ -248,53 +254,53 @@ MonitorExecute(void)
|
||||
|
||||
/* Check if we still are a standby, we could have been promoted */
|
||||
if (!is_standby(myLocalConn))
|
||||
{
|
||||
fprintf(stderr, "\n%s: seems like we have been promoted, so exit from monitoring...\n",
|
||||
{
|
||||
fprintf(stderr, "\n%s: seems like we have been promoted, so exit from monitoring...\n",
|
||||
progname);
|
||||
CloseConnections();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
/*
|
||||
* first check if there is a command being executed,
|
||||
* and if that is the case, cancel the query so i can
|
||||
* insert the current record
|
||||
*/
|
||||
* insert the current record
|
||||
*/
|
||||
if (PQisBusy(primaryConn) == 1)
|
||||
CancelQuery();
|
||||
|
||||
/* Get local xlog info */
|
||||
sprintf(sqlquery,
|
||||
"SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
|
||||
"pg_last_xlog_replay_location()");
|
||||
sprintf(sqlquery,
|
||||
"SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
|
||||
"pg_last_xlog_replay_location()");
|
||||
|
||||
res = PQexec(myLocalConn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn));
|
||||
PQclear(res);
|
||||
res = PQexec(myLocalConn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn));
|
||||
PQclear(res);
|
||||
/* if there is any error just let it be and retry in next loop */
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
strcpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0));
|
||||
strcpy(last_wal_standby_received , PQgetvalue(res, 0, 1));
|
||||
strcpy(last_wal_standby_applied , PQgetvalue(res, 0, 2));
|
||||
PQclear(res);
|
||||
strcpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0));
|
||||
strcpy(last_wal_standby_received , PQgetvalue(res, 0, 1));
|
||||
strcpy(last_wal_standby_applied , PQgetvalue(res, 0, 2));
|
||||
PQclear(res);
|
||||
|
||||
/* Get primary xlog info */
|
||||
sprintf(sqlquery, "SELECT pg_current_xlog_location() ");
|
||||
sprintf(sqlquery, "SELECT pg_current_xlog_location() ");
|
||||
|
||||
res = PQexec(primaryConn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(primaryConn));
|
||||
PQclear(res);
|
||||
return;
|
||||
}
|
||||
res = PQexec(primaryConn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(primaryConn));
|
||||
PQclear(res);
|
||||
return;
|
||||
}
|
||||
|
||||
strcpy(last_wal_primary_location, PQgetvalue(res, 0, 0));
|
||||
PQclear(res);
|
||||
strcpy(last_wal_primary_location, PQgetvalue(res, 0, 0));
|
||||
PQclear(res);
|
||||
|
||||
/* Calculate the lag */
|
||||
lsn_primary = walLocationToBytes(last_wal_primary_location);
|
||||
@@ -305,15 +311,15 @@ MonitorExecute(void)
|
||||
* Build the SQL to execute on primary
|
||||
*/
|
||||
sprintf(sqlquery,
|
||||
"INSERT INTO repmgr_%s.repl_monitor "
|
||||
"VALUES(%d, %d, '%s'::timestamp with time zone, "
|
||||
" '%s', '%s', "
|
||||
" %lld, %lld)", myClusterName,
|
||||
primaryId, myLocalId, monitor_standby_timestamp,
|
||||
last_wal_primary_location,
|
||||
last_wal_standby_received,
|
||||
(lsn_primary - lsn_standby_received),
|
||||
(lsn_standby_received - lsn_standby_applied));
|
||||
"INSERT INTO repmgr_%s.repl_monitor "
|
||||
"VALUES(%d, %d, '%s'::timestamp with time zone, "
|
||||
" '%s', '%s', "
|
||||
" %lld, %lld)", myClusterName,
|
||||
primaryId, myLocalId, monitor_standby_timestamp,
|
||||
last_wal_primary_location,
|
||||
last_wal_standby_received,
|
||||
(lsn_primary - lsn_standby_received),
|
||||
(lsn_standby_received - lsn_standby_applied));
|
||||
|
||||
/*
|
||||
* Execute the query asynchronously, but don't check for a result. We
|
||||
@@ -321,39 +327,41 @@ MonitorExecute(void)
|
||||
*/
|
||||
if (PQsendQuery(primaryConn, sqlquery) == 0)
|
||||
fprintf(stderr, "Query could not be sent to primary. %s\n",
|
||||
PQerrorMessage(primaryConn));
|
||||
PQerrorMessage(primaryConn));
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
checkClusterConfiguration(void)
|
||||
{
|
||||
PGresult *res;
|
||||
PGresult *res;
|
||||
|
||||
sprintf(sqlquery, "SELECT oid FROM pg_class "
|
||||
" WHERE oid = 'repmgr_%s.repl_nodes'::regclass",
|
||||
myClusterName);
|
||||
res = PQexec(myLocalConn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn));
|
||||
PQclear(res);
|
||||
PQfinish(myLocalConn);
|
||||
PQfinish(primaryConn);
|
||||
" WHERE oid = 'repmgr_%s.repl_nodes'::regclass",
|
||||
myClusterName);
|
||||
res = PQexec(myLocalConn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn));
|
||||
PQclear(res);
|
||||
PQfinish(myLocalConn);
|
||||
PQfinish(primaryConn);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If there isn't any results then we have not configured a primary node yet
|
||||
* in repmgr or the connection string is pointing to the wrong database.
|
||||
* If there isn't any results then we have not configured a primary node
|
||||
* yet in repmgr or the connection string is pointing to the wrong
|
||||
* database.
|
||||
*
|
||||
* XXX if we are the primary, should we try to create the tables needed?
|
||||
*/
|
||||
if (PQntuples(res) == 0)
|
||||
{
|
||||
fprintf(stderr, "The replication cluster is not configured\n");
|
||||
PQclear(res);
|
||||
PQfinish(myLocalConn);
|
||||
PQfinish(primaryConn);
|
||||
fprintf(stderr, "The replication cluster is not configured\n");
|
||||
PQclear(res);
|
||||
PQfinish(myLocalConn);
|
||||
PQfinish(primaryConn);
|
||||
exit(1);
|
||||
}
|
||||
PQclear(res);
|
||||
@@ -363,41 +371,40 @@ checkClusterConfiguration(void)
|
||||
static void
|
||||
checkNodeConfiguration(char *conninfo)
|
||||
{
|
||||
PGresult *res;
|
||||
PGresult *res;
|
||||
|
||||
/*
|
||||
* Check if we have my node information in repl_nodes
|
||||
*/
|
||||
/* Check if we have my node information in repl_nodes */
|
||||
sprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes "
|
||||
" WHERE id = %d AND cluster = '%s' ",
|
||||
myClusterName, myLocalId, myClusterName);
|
||||
" WHERE id = %d AND cluster = '%s' ",
|
||||
myClusterName, myLocalId, myClusterName);
|
||||
|
||||
res = PQexec(myLocalConn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn));
|
||||
PQclear(res);
|
||||
PQfinish(myLocalConn);
|
||||
PQfinish(primaryConn);
|
||||
res = PQexec(myLocalConn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
fprintf(stderr, "PQexec failed: %s\n", PQerrorMessage(myLocalConn));
|
||||
PQclear(res);
|
||||
PQfinish(myLocalConn);
|
||||
PQfinish(primaryConn);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If there isn't any results then we have not configured this node yet
|
||||
* in repmgr, if that is the case we will insert the node to the cluster
|
||||
* in repmgr, if that is the case we will insert the node to the cluster
|
||||
*/
|
||||
if (PQntuples(res) == 0)
|
||||
{
|
||||
PQclear(res);
|
||||
PQclear(res);
|
||||
|
||||
/* Adding the node */
|
||||
sprintf(sqlquery, "INSERT INTO repmgr_%s.repl_nodes "
|
||||
"VALUES (%d, '%s', '%s')",
|
||||
myClusterName, myLocalId, myClusterName, conninfo);
|
||||
"VALUES (%d, '%s', '%s')",
|
||||
myClusterName, myLocalId, myClusterName, conninfo);
|
||||
|
||||
if (!PQexec(primaryConn, sqlquery))
|
||||
if (!PQexec(primaryConn, sqlquery))
|
||||
{
|
||||
fprintf(stderr, "Cannot insert node details, %s\n",
|
||||
PQerrorMessage(primaryConn));
|
||||
PQerrorMessage(primaryConn));
|
||||
PQfinish(myLocalConn);
|
||||
PQfinish(primaryConn);
|
||||
exit(1);
|
||||
@@ -407,33 +414,33 @@ checkNodeConfiguration(char *conninfo)
|
||||
}
|
||||
|
||||
|
||||
static unsigned long long int
|
||||
static unsigned long long int
|
||||
walLocationToBytes(char *wal_location)
|
||||
{
|
||||
unsigned int xlogid;
|
||||
unsigned int xrecoff;
|
||||
unsigned int xlogid;
|
||||
unsigned int xrecoff;
|
||||
|
||||
if (sscanf(wal_location, "%X/%X", &xlogid, &xrecoff) != 2)
|
||||
{
|
||||
fprintf(stderr, "wrong log location format: %s\n", wal_location);
|
||||
return 0;
|
||||
}
|
||||
return ((xlogid * 16 * 1024 * 1024 * 255) + xrecoff);
|
||||
if (sscanf(wal_location, "%X/%X", &xlogid, &xrecoff) != 2)
|
||||
{
|
||||
fprintf(stderr, "wrong log location format: %s\n", wal_location);
|
||||
return 0;
|
||||
}
|
||||
return ((xlogid * 16 * 1024 * 1024 * 255) + xrecoff);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
static void
|
||||
help(const char *progname)
|
||||
{
|
||||
printf(_("\n%s: Replicator manager daemon \n"), progname);
|
||||
printf(_("Usage:\n"));
|
||||
printf(_(" %s [OPTIONS]\n"), progname);
|
||||
printf(_("\nOptions:\n"));
|
||||
printf(_(" --help show this help, then exit\n"));
|
||||
printf(_(" --version output version information, then exit\n"));
|
||||
printf(_(" --verbose output verbose activity information\n"));
|
||||
printf(_(" -f, --config_file=PATH database to connect to\n"));
|
||||
printf(_("\n%s monitors a cluster of servers.\n"), progname);
|
||||
printf(_("\n%s: Replicator manager daemon \n"), progname);
|
||||
printf(_("Usage:\n"));
|
||||
printf(_(" %s [OPTIONS]\n"), progname);
|
||||
printf(_("\nOptions:\n"));
|
||||
printf(_(" --help show this help, then exit\n"));
|
||||
printf(_(" --version output version information, then exit\n"));
|
||||
printf(_(" --verbose output verbose activity information\n"));
|
||||
printf(_(" -f, --config_file=PATH database to connect to\n"));
|
||||
printf(_("\n%s monitors a cluster of servers.\n"), progname);
|
||||
}
|
||||
|
||||
|
||||
@@ -442,13 +449,13 @@ help(const char *progname)
|
||||
static void
|
||||
handle_sigint(SIGNAL_ARGS)
|
||||
{
|
||||
CloseConnections();
|
||||
CloseConnections();
|
||||
}
|
||||
|
||||
static void
|
||||
setup_cancel_handler(void)
|
||||
{
|
||||
pqsignal(SIGINT, handle_sigint);
|
||||
pqsignal(SIGINT, handle_sigint);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -456,13 +463,13 @@ setup_cancel_handler(void)
|
||||
static void
|
||||
CancelQuery(void)
|
||||
{
|
||||
char errbuf[256];
|
||||
PGcancel *pgcancel;
|
||||
char errbuf[256];
|
||||
PGcancel *pgcancel;
|
||||
|
||||
pgcancel = PQgetCancel(primaryConn);
|
||||
pgcancel = PQgetCancel(primaryConn);
|
||||
|
||||
if (!pgcancel || PQcancel(pgcancel, errbuf, 256) == 0)
|
||||
fprintf(stderr, "Can't stop current query: %s", errbuf);
|
||||
if (!pgcancel || PQcancel(pgcancel, errbuf, 256) == 0)
|
||||
fprintf(stderr, "Can't stop current query: %s", errbuf);
|
||||
|
||||
PQfreeCancel(pgcancel);
|
||||
PQfreeCancel(pgcancel);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user