Merge commit '3ef1fa126d9c9b9ba3b29deab7f67218cdf7ce10'

Conflicts:
	.gitignore
	Makefile
	README.rst
	check_dir.c
	config.c
	config.h
	dbutils.h
	repmgr.c
	repmgr.conf
	repmgr.h
	repmgrd.c
This commit is contained in:
Greg Smith
2011-02-15 00:06:01 -05:00
15 changed files with 541 additions and 233 deletions

View File

@@ -9,3 +9,4 @@ Bas van Oostveen <v.oostveen@gmail.com>
Hannu Krosing <hannu@2ndQuadrant.com>
Cédric Villemain <cedric@2ndquadrant.com>
Charles Duffy <charles@dyfis.net>
Daniel Farina <daniel@heroku.com>

View File

@@ -2,8 +2,10 @@
# Makefile
# Copyright (c) 2ndQuadrant, 2010-2011
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
DATA = repmgr.sql uninstall_repmgr.sql
PG_CPPFLAGS = -I$(libpq_srcdir)
PG_LIBS = $(libpq_pgport)
@@ -26,10 +28,19 @@ include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif
# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now
# is overriding pgxs install.
install:
$(INSTALL_PROGRAM) repmgrd$(X) '$(DESTDIR)$(bindir)'
$(INSTALL_PROGRAM) repmgr$(X) '$(DESTDIR)$(bindir)'
ifneq (,$(DATA)$(DATA_built))
@for file in $(addprefix $(srcdir)/, $(DATA)) $(DATA_built); do \
echo "$(INSTALL_DATA) $$file '$(DESTDIR)$(datadir)/$(datamoduledir)'"; \
$(INSTALL_DATA) $$file '$(DESTDIR)$(datadir)/$(datamoduledir)'; \
done
endif
clean:
rm -f *.o
rm -f repmgrd

View File

@@ -555,6 +555,7 @@ following
* ERR_DB_CON 6: Error when trying to connect to a database.
* ERR_DB_QUERY 7: Error executing a database query.
* ERR_PROMOTED 8: Exiting program because the node has been promoted to master.
* ERR_BAD_PASSWORD 9: Password used to connect to a database was rejected.
Detailed walkthrough
====================

View File

@@ -1,6 +1,6 @@
/*
* check_dir.c - Directories management functions
* Copyright (C) 2ndQuadrant, 2011
* Copyright (C) 2ndQuadrant, 2010-2011
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -24,9 +24,12 @@
#include <stdio.h>
#include <string.h>
/* NB: postgres_fe must be included BEFORE check_dir */
#include "postgres_fe.h"
#include "check_dir.h"
#include "strutil.h"
static int mkdir_p(char *path, mode_t omode);
@@ -64,7 +67,7 @@ check_dir(char *dir)
}
else
{
result = 2; /* not empty */
result = 2; /* not empty */
break;
}
}
@@ -111,7 +114,7 @@ set_directory_permissions(char *dir)
/* function from initdb.c */
/* source stolen from FreeBSD /src/bin/mkdir/mkdir.c and adapted */
/* source adapted from FreeBSD /src/bin/mkdir/mkdir.c */
/*
* this tries to build all the elements of a path to a directory a la mkdir -p
@@ -219,10 +222,11 @@ mkdir_p(char *path, mode_t omode)
bool
is_pg_dir(char *dir)
{
char path[8192];
struct stat sb;
const size_t buf_sz = 8192;
char path[buf_sz];
struct stat sb;
sprintf(path, "%s/PG_VERSION", dir);
xsnprintf(path, buf_sz, "%s/PG_VERSION", dir);
return (stat(path, &sb) == 0) ? true : false;
}

View File

@@ -18,6 +18,8 @@
*/
#include "config.h"
#include "repmgr.h"
#include "strutil.h"
void
parse_config(const char* config_file, t_configuration_options* options)

View File

@@ -21,6 +21,7 @@
#define _REPMGR_CONFIG_H_
#include "repmgr.h"
#include "strutil.h"
typedef struct
{

110
dbutils.c
View File

@@ -19,8 +19,7 @@
#include "repmgr.h"
#define MAXQUERY 8192
#define MAXCONNINFO 1024
#include "strutil.h"
PGconn *
establishDBConnection(const char *conninfo, const bool exit_on_error)
@@ -33,6 +32,7 @@ establishDBConnection(const char *conninfo, const bool exit_on_error)
{
fprintf(stderr, "Connection to database failed: %s",
PQerrorMessage(conn));
if (exit_on_error)
{
PQfinish(conn);
@@ -44,7 +44,6 @@ establishDBConnection(const char *conninfo, const bool exit_on_error)
}
bool
is_standby(PGconn *conn)
{
@@ -52,6 +51,7 @@ is_standby(PGconn *conn)
bool result;
res = PQexec(conn, "SELECT pg_is_in_recovery()");
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
fprintf(stderr, "Can't query server mode: %s", PQerrorMessage(conn));
@@ -79,11 +79,15 @@ pg_version(PGconn *conn, char* major_version)
{
PGresult *res;
int major_version1;
char *major_version2;
int major_version1;
char *major_version2;
res = PQexec(conn,
"WITH pg_version(ver) AS "
"(SELECT split_part(version(), ' ', 2)) "
"SELECT split_part(ver, '.', 1), split_part(ver, '.', 2) "
"FROM pg_version");
res = PQexec(conn, "WITH pg_version(ver) AS (SELECT split_part(version(), ' ', 2)) "
"SELECT split_part(ver, '.', 1), split_part(ver, '.', 2) FROM pg_version");
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
fprintf(stderr, "PQexec failed: %s", PQerrorMessage(conn));
@@ -91,31 +95,35 @@ pg_version(PGconn *conn, char* major_version)
PQfinish(conn);
exit(ERR_DB_QUERY);
}
major_version1 = atoi(PQgetvalue(res, 0, 0));
major_version2 = PQgetvalue(res, 0, 1);
PQclear(res);
if (major_version1 >= 9)
{
/* form a major version string */
snprintf(major_version, MAXVERSIONSTR, "%d.%s", major_version1, major_version2);
xsnprintf(major_version, MAXVERSIONSTR, "%d.%s", major_version1,
major_version2);
}
else
strcpy(major_version, "");
PQclear(res);
return major_version;
}
bool
guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value)
guc_setted(PGconn *conn, const char *parameter, const char *op,
const char *value)
{
PGresult *res;
char sqlquery[MAXQUERY];
char sqlquery[QUERY_STR_LEN];
sprintf(sqlquery, "SELECT true FROM pg_settings "
" WHERE name = '%s' AND setting %s '%s'",
parameter, op, value);
sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings "
" WHERE name = '%s' AND setting %s '%s'",
parameter, op, value);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -140,11 +148,13 @@ const char *
get_cluster_size(PGconn *conn)
{
PGresult *res;
const char *size;
char sqlquery[MAXQUERY];
const char *size;
char sqlquery[QUERY_STR_LEN];
sprintf(sqlquery, "SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) "
" FROM pg_database ");
sqlquery_snprintf(
sqlquery,
"SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) "
" FROM pg_database ");
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -162,26 +172,57 @@ get_cluster_size(PGconn *conn)
/*
* get a connection to master by reading repl_nodes, creating a connection
* to each node (one at a time) and finding if it is a master or a standby
*
* NB: If master_conninfo_out may be NULL. If it is non-null, it is assumed to
* point to allocated memory of MAXCONNINFO in length, and the master server
* connection string is placed there.
*/
PGconn *
getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id)
getMasterConnection(PGconn *standby_conn, int id, char *cluster,
int *master_id, char *master_conninfo_out)
{
PGconn *master_conn = NULL;
PGresult *res1;
PGresult *res2;
char sqlquery[MAXQUERY];
char master_conninfo[MAXCONNINFO];
PGconn *master_conn = NULL;
PGresult *res1;
PGresult *res2;
char sqlquery[QUERY_STR_LEN];
char master_conninfo_stack[MAXCONNINFO];
char *master_conninfo = &*master_conninfo_stack;
char schema_str[MAXLEN];
char schema_quoted[MAXLEN];
int i;
/*
* If the caller wanted to get a copy of the connection info string, sub
* out the local stack pointer for the pointer passed by the caller.
*/
if (master_conninfo_out != NULL)
master_conninfo = master_conninfo_out;
/*
* XXX: This is copied in at least two other procedures
*
* Assemble the unquoted schema name
*/
maxlen_snprintf(schema_str, "repmgr_%s", cluster);
{
char *identifier = PQescapeIdentifier(standby_conn, schema_str,
strlen(schema_str));
maxlen_snprintf(schema_quoted, "%s", identifier);
PQfreemem(identifier);
}
/* find all nodes belonging to this cluster */
sprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes "
" WHERE cluster = '%s' and id <> %d",
cluster, cluster, id);
sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes "
" WHERE cluster = '%s' and id <> %d",
schema_quoted, cluster, id);
res1 = PQexec(standby_conn, sqlquery);
if (PQresultStatus(res1) != PGRES_TUPLES_OK)
{
fprintf(stderr, "Can't get nodes info: %s\n", PQerrorMessage(standby_conn));
fprintf(stderr, "Can't get nodes info: %s\n",
PQerrorMessage(standby_conn));
PQclear(res1);
PQfinish(standby_conn);
exit(ERR_DB_QUERY);
@@ -193,18 +234,21 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id)
*master_id = atoi(PQgetvalue(res1, i, 0));
strncpy(master_conninfo, PQgetvalue(res1, i, 2), MAXCONNINFO);
master_conn = establishDBConnection(master_conninfo, false);
if (PQstatus(master_conn) != CONNECTION_OK)
continue;
/*
* I can't use the is_standby() function here because on error that
* function closes the connection i pass and exit, but i still need to close
* standby_conn
* function closes the connection i pass and exit, but i still need to
* close standby_conn
*/
res2 = PQexec(master_conn, "SELECT pg_is_in_recovery()");
if (PQresultStatus(res2) != PGRES_TUPLES_OK)
{
fprintf(stderr, "Can't get recovery state from this node: %s\n", PQerrorMessage(master_conn));
fprintf(stderr, "Can't get recovery state from this node: %s\n",
PQerrorMessage(master_conn));
PQclear(res2);
PQfinish(master_conn);
continue;
@@ -229,7 +273,8 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id)
/* If we finish this loop without finding a master then
* we doesn't have the info or the master has failed (or we
* reached max_connections or superuser_reserved_connections,
* anything else i'm missing?),
* anything else I'm missing?).
*
* Probably we will need to check the error to know if we need
* to start failover procedure or just fix some situation on the
* standby.
@@ -237,4 +282,3 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id)
PQclear(res1);
return NULL;
}

View File

@@ -21,10 +21,12 @@
#define _REPMGR_DBUTILS_H_
PGconn *establishDBConnection(const char *conninfo, const bool exit_on_error);
bool is_standby(PGconn *conn);
bool is_standby(PGconn *conn);
char *pg_version(PGconn *conn, char* major_version);
bool guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value);
const char *get_cluster_size(PGconn *conn);
PGconn * getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id);
bool guc_setted(PGconn *conn, const char *parameter, const char *op,
const char *value);
const char *get_cluster_size(PGconn *conn);
PGconn *getMasterConnection(PGconn *standby_conn, int id, char *cluster,
int *master_id, char *master_conninfo_out);
#endif

401
repmgr.c
View File

@@ -34,20 +34,22 @@
#include "log.h"
#include "config.h"
#include "check_dir.h"
#include "strutil.h"
#define RECOVERY_FILE "recovery.conf"
#define RECOVERY_DONE_FILE "recovery.done"
#define NO_ACTION 0 /* Not a real action, just to initialize */
#define MASTER_REGISTER 1
#define NO_ACTION 0 /* Not a real action, just to initialize */
#define MASTER_REGISTER 1
#define STANDBY_REGISTER 2
#define STANDBY_CLONE 3
#define STANDBY_PROMOTE 4
#define STANDBY_FOLLOW 5
static void help(const char *progname);
static bool create_recovery_file(const char *data_dir);
static int copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory);
static bool create_recovery_file(const char *data_dir, char *master_conninfo);
static int copy_remote_files(char *host, char *remote_user, char *remote_path,
char *local_path, bool is_directory);
static bool check_parameters_for_action(const int action);
static void do_master_register(void);
@@ -111,7 +113,8 @@ main(int argc, char **argv)
}
while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:R:w:F:v", long_options, &optindex)) != -1)
while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:R:w:F:v", long_options,
&optindex)) != -1)
{
switch (c)
{
@@ -164,7 +167,8 @@ main(int argc, char **argv)
if (optind < argc)
{
server_mode = argv[optind++];
if (strcasecmp(server_mode, "STANDBY") != 0 && strcasecmp(server_mode, "MASTER") != 0)
if (strcasecmp(server_mode, "STANDBY") != 0 &&
strcasecmp(server_mode, "MASTER") != 0)
{
usage();
exit(ERR_BAD_CONFIG);
@@ -240,36 +244,18 @@ main(int argc, char **argv)
}
/*
* Read the configuration file: repmgr.conf
* Read the configuration file: repmgr.conf, but only if we're not doing a
* STANDBY CLONE action: it is not necessary to have the configuration file
* in that case.
*/
parse_config(runtime_options.config_file, &options);
if (options.node == -1)
if (action != STANDBY_CLONE)
{
fprintf(stderr, "Node information is missing. "
"Check the configuration file.\n");
exit(ERR_BAD_CONFIG);
if (runtime_options.verbose)
printf(_("Opening configuration file: %s\n"), runtime_options.config_file);
parse_config(runtime_options.config_file, &options);
}
keywords[2] = "user";
values[2] = runtime_options.username;
keywords[3] = "dbname";
values[3] = runtime_options.dbname;
keywords[4] = "application_name";
values[4] = (char *) progname;
keywords[5] = NULL;
values[5] = NULL;
if (!runtime_options.config_file[0])
strncpy(runtime_options.config_file, DEFAULT_CONFIG_FILE, MAXLEN);
/*
* Read the configuration file: repmgr.conf
*/
if (runtime_options.verbose)
printf(_("Opening configuration file: %s\n"), runtime_options.config_file);
parse_config(runtime_options.config_file, &options);
if (need_a_node)
{
@@ -317,11 +303,12 @@ main(int argc, char **argv)
static void
do_master_register(void)
{
PGconn *conn;
PGconn *conn;
PGresult *res;
char sqlquery[QUERY_STR_LEN];
bool schema_exists = false;
char schema_quoted[MAXLEN];
char master_version[MAXVERSIONSTR];
conn = establishDBConnection(options.conninfo, true);
@@ -343,8 +330,22 @@ do_master_register(void)
exit(ERR_BAD_CONFIG);
}
/* Assemble a quoted schema name
* XXX This is not currently used due to a merge conflict, but
* probably should be */
if (false)
{
char *identifier = PQescapeIdentifier(conn, repmgr_schema,
strlen(repmgr_schema));
maxlen_snprintf(schema_quoted, "%s", identifier);
PQfreemem(identifier);
}
/* Check if there is a schema for this cluster */
snprintf(sqlquery, QUERY_STR_LEN, "SELECT 1 FROM pg_namespace WHERE nspname = '%s'", repmgr_schema);
sqlquery_snprintf(sqlquery,
"SELECT 1 FROM pg_namespace "
"WHERE nspname = '%s'", repmgr_schema);
log_debug("master register: %s\n", sqlquery);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -373,7 +374,7 @@ do_master_register(void)
log_info("master register: creating database objects inside the %s schema", repmgr_schema);
/* ok, create the schema */
snprintf(sqlquery, QUERY_STR_LEN, "CREATE SCHEMA %s", repmgr_schema);
sqlquery_snprintf(sqlquery, "CREATE SCHEMA %s", repmgr_schema);
log_debug("master register: %s\n", sqlquery);
if (!PQexec(conn, sqlquery))
{
@@ -384,7 +385,7 @@ do_master_register(void)
}
/* ... the tables */
snprintf(sqlquery, QUERY_STR_LEN, "CREATE TABLE %s.repl_nodes ( "
sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_nodes ( "
" id integer primary key, "
" cluster text not null, "
" conninfo text not null)", repmgr_schema);
@@ -397,7 +398,7 @@ do_master_register(void)
exit(ERR_BAD_CONFIG);
}
snprintf(sqlquery, QUERY_STR_LEN, "CREATE TABLE %s.repl_monitor ( "
sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_monitor ( "
" primary_node INTEGER NOT NULL, "
" standby_node INTEGER NOT NULL, "
" last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, "
@@ -415,7 +416,7 @@ do_master_register(void)
}
/* and the view */
snprintf(sqlquery, QUERY_STR_LEN, "CREATE VIEW %s.repl_status AS "
sqlquery_snprintf(sqlquery, "CREATE VIEW %s.repl_status AS "
" WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node "
" ORDER BY last_monitor_time desc) "
" FROM %s.repl_monitor) "
@@ -436,10 +437,11 @@ do_master_register(void)
else
{
PGconn *master_conn;
int id;
int id;
/* Ensure there isn't any other master already registered */
master_conn = getMasterConnection(conn, options.node, options.cluster_name, &id);
master_conn = getMasterConnection(conn, options.node,
options.cluster_name, &id,NULL);
if (master_conn != NULL)
{
PQfinish(master_conn);
@@ -451,7 +453,7 @@ do_master_register(void)
/* Now register the master */
if (runtime_options.force)
{
snprintf(sqlquery, QUERY_STR_LEN, "DELETE FROM %s.repl_nodes "
sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes "
" WHERE id = %d",
repmgr_schema, options.node);
log_debug("master register: %s\n", sqlquery);
@@ -465,7 +467,7 @@ do_master_register(void)
}
}
snprintf(sqlquery, QUERY_STR_LEN, "INSERT INTO %s.repl_nodes "
sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes "
"VALUES (%d, '%s', '%s')",
repmgr_schema, options.node, options.cluster_name, options.conninfo);
log_debug("master register: %s\n", sqlquery);
@@ -488,18 +490,21 @@ do_master_register(void)
static void
do_standby_register(void)
{
PGconn *conn;
PGconn *conn;
PGconn *master_conn;
int master_id;
PGresult *res;
char sqlquery[QUERY_STR_LEN];
char schema_quoted[MAXLEN];
char master_version[MAXVERSIONSTR];
char standby_version[MAXVERSIONSTR];
conn = establishDBConnection(options.conninfo, true);
/* XXX: A lot of copied code from do_master_register! Refactor */
/* should be v9 or better */
pg_version(conn, standby_version);
if (strcmp(standby_version, "") == 0)
@@ -517,8 +522,20 @@ do_standby_register(void)
exit(ERR_BAD_CONFIG);
}
/* Assemble a quoted schema name
* XXX This is not currently used due to a merge conflict, but
* probably should be */
if (false)
{
char *identifier = PQescapeIdentifier(conn, repmgr_schema,
strlen(repmgr_schema));
maxlen_snprintf(schema_quoted, "%s", identifier);
PQfreemem(identifier);
}
/* Check if there is a schema for this cluster */
snprintf(sqlquery, QUERY_STR_LEN, "SELECT 1 FROM pg_namespace WHERE nspname = '%s'", repmgr_schema);
sqlquery_snprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = '%s'", repmgr_schema);
log_debug("standby register: %s\n", sqlquery);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -529,8 +546,9 @@ do_standby_register(void)
exit(ERR_BAD_CONFIG);
}
if (PQntuples(res) == 0) /* schema doesn't exist */
if (PQntuples(res) == 0)
{
/* schema doesn't exist */
log_err("Schema %s doesn't exists.\n", repmgr_schema);
PQclear(res);
PQfinish(conn);
@@ -539,7 +557,8 @@ do_standby_register(void)
PQclear(res);
/* check if there is a master in this cluster */
master_conn = getMasterConnection(conn, options.node, options.cluster_name, &master_id);
master_conn = getMasterConnection(conn, options.node, options.cluster_name,
&master_id, NULL);
if (!master_conn)
{
log_err(_("A master must be defined before configuring a slave\n"));
@@ -570,7 +589,7 @@ do_standby_register(void)
/* Now register the standby */
if (runtime_options.force)
{
snprintf(sqlquery, QUERY_STR_LEN, "DELETE FROM %s.repl_nodes "
sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes "
" WHERE id = %d",
repmgr_schema, options.node);
log_debug("standby register: %s\n", sqlquery);
@@ -585,7 +604,7 @@ do_standby_register(void)
}
}
snprintf(sqlquery, QUERY_STR_LEN, "INSERT INTO %s.repl_nodes "
sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes "
"VALUES (%d, '%s', '%s')",
repmgr_schema, options.node, options.cluster_name, options.conninfo);
log_debug("standby register: %s\n", sqlquery);
@@ -608,9 +627,9 @@ do_standby_register(void)
static void
do_standby_clone(void)
{
PGconn *conn;
PGconn *conn;
PGresult *res;
char sqlquery[QUERY_STR_LEN];
char sqlquery[QUERY_STR_LEN];
int r = 0;
int i;
@@ -624,8 +643,8 @@ do_standby_clone(void)
char local_control_file[MAXFILENAME];
char tblspc_dir[MAXFILENAME];
const char *first_wal_segment = NULL;
const char *last_wal_segment = NULL;
char *first_wal_segment = NULL;
const char *last_wal_segment = NULL;
char master_version[MAXVERSIONSTR];
@@ -745,8 +764,14 @@ do_standby_clone(void)
log_info(_("Succesfully connected to primary. Current installation size is %s\n"), get_cluster_size(conn));
/* Check if the tablespace locations exists and that we can write to them */
snprintf(sqlquery, QUERY_STR_LEN, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')");
/*
* Check if the tablespace locations exists and that we can write to
* them.
*/
sqlquery_snprintf(sqlquery,
"SELECT spclocation "
" FROM pg_tablespace "
"WHERE spcname NOT IN ('pg_default', 'pg_global')");
log_debug("standby clone: %s\n", sqlquery);
res = PQexec(conn, sqlquery);
@@ -798,9 +823,10 @@ do_standby_clone(void)
/* Present and not empty */
if (!runtime_options.force)
{
fprintf(stderr,
_("%s: directory \"%s\" exists but is not empty\n"),
progname, tblspc_dir);
fprintf(
stderr,
_("%s: directory \"%s\" exists but is not empty\n"),
progname, tblspc_dir);
PQclear(res);
PQfinish(conn);
exit(ERR_BAD_CONFIG);
@@ -818,9 +844,10 @@ do_standby_clone(void)
log_notice("Starting backup...\n");
/* Get the data directory full path and the configuration files location */
snprintf(sqlquery, QUERY_STR_LEN, "SELECT name, setting "
" FROM pg_settings "
" WHERE name IN ('data_directory', 'config_file', 'hba_file', 'ident_file')");
sqlquery_snprintf(sqlquery,
"SELECT name, setting "
" FROM pg_settings "
" WHERE name IN ('data_directory', 'config_file', 'hba_file', 'ident_file')");
log_debug("standby clone: %s\n", sqlquery);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -849,9 +876,11 @@ do_standby_clone(void)
* inform the master we will start a backup and get the first XLog filename
* so we can say to the user we need those files
*/
snprintf(sqlquery, QUERY_STR_LEN, "SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))", time(NULL));
sqlquery_snprintf(
sqlquery,
"SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))",
time(NULL));
log_debug("standby clone: %s\n", sqlquery);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
@@ -860,25 +889,36 @@ do_standby_clone(void)
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
first_wal_segment = PQgetvalue(res, 0, 0);
if (runtime_options.verbose)
{
char *first_wal_seg_pq = PQgetvalue(res, 0, 0);
size_t buf_sz = strlen(first_wal_seg_pq);
first_wal_segment = malloc(buf_sz + 1);
xsnprintf(first_wal_segment, buf_sz + 1, "%s", first_wal_seg_pq);
}
PQclear(res);
/*
* 1) first move global/pg_control
*
* 2) then move data_directory ommiting the files we have already moved and pg_xlog
* content
* 2) then move data_directory ommiting the files we have already moved and
* pg_xlog content
*
* 3) finally We need to backup configuration files (that could be on other directories, debian
* like systems likes to do that), so look at config_file, hba_file and ident_file but we
* can omit external_pid_file ;)
* 3) finally We need to backup configuration files (that could be on other
* directories, debian like systems likes to do that), so look at
* config_file, hba_file and ident_file but we can omit
* external_pid_file ;)
*
* On error we need to return but before that execute pg_stop_backup()
*/
/* need to create the global sub directory */
snprintf(master_control_file, MAXFILENAME, "%s/global/pg_control", master_data_directory);
snprintf(local_control_file, MAXFILENAME, "%s/global", runtime_options.dest_dir);
maxlen_snprintf(master_control_file, "%s/global/pg_control",
master_data_directory);
maxlen_snprintf(local_control_file, "%s/global", runtime_options.dest_dir);
if (!create_directory(local_control_file))
{
log_err(_("%s: couldn't create directory %s ...\n"),
@@ -887,7 +927,8 @@ do_standby_clone(void)
}
log_info("standby clone: master control file '%s'\n", master_control_file);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user, master_control_file, local_control_file, false);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
master_control_file, local_control_file, false);
if (r != 0)
{
log_warning("standby clone: failed copying master control file '%s'\n", master_control_file);
@@ -895,7 +936,8 @@ do_standby_clone(void)
}
log_info("standby clone: master data directory '%s'\n", master_data_directory);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user, master_data_directory, runtime_options.dest_dir, true);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
master_data_directory, runtime_options.dest_dir, true);
if (r != 0)
{
log_warning("standby clone: failed copying master data directory '%s'\n", master_data_directory);
@@ -903,10 +945,14 @@ do_standby_clone(void)
}
/*
* Copy tablespace locations, i'm doing this separately because i couldn't find and appropiate
* rsync option but besides we could someday make all these rsync happen concurrently
* Copy tablespace locations, i'm doing this separately because i couldn't
* find and appropiate rsync option but besides we could someday make all
* these rsync happen concurrently
*/
snprintf(sqlquery, QUERY_STR_LEN, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')");
sqlquery_snprintf(sqlquery,
"SELECT spclocation "
" FROM pg_tablespace "
" WHERE spcname NOT IN ('pg_default', 'pg_global')");
log_debug("standby clone: %s\n", sqlquery);
res = PQexec(conn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -919,7 +965,8 @@ do_standby_clone(void)
{
strncpy(tblspc_dir, PQgetvalue(res, i, 0), MAXFILENAME);
log_info("standby clone: master tablespace '%s'\n", tblspc_dir);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user, tblspc_dir, tblspc_dir, true);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
tblspc_dir, tblspc_dir, true);
if (r != 0)
{
log_warning("standby clone: failed copying tablespace directory '%s'\n", tblspc_dir);
@@ -928,7 +975,8 @@ do_standby_clone(void)
}
log_info("standby clone: master config file '%s'\n", master_config_file);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user, master_config_file, runtime_options.dest_dir, false);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
master_config_file, runtime_options.dest_dir, false);
if (r != 0)
{
log_warning("standby clone: failed copying master config file '%s'\n", master_config_file);
@@ -944,7 +992,8 @@ do_standby_clone(void)
}
log_info("standby clone: master ident file '%s'\n", master_ident_file);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user, master_ident_file, runtime_options.dest_dir, false);
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
master_ident_file, runtime_options.dest_dir, false);
if (r != 0)
{
log_warning("standby clone: failed copying master ident file '%s'\n", master_ident_file);
@@ -963,7 +1012,7 @@ stop_backup:
log_notice("Finishing backup...\n");
snprintf(sqlquery, QUERY_STR_LEN, "SELECT pg_xlogfile_name(pg_stop_backup())");
sqlquery_snprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())");
log_debug("standby clone: %s\n", sqlquery);
res = PQexec(conn, sqlquery);
@@ -976,16 +1025,35 @@ stop_backup:
}
last_wal_segment = PQgetvalue(res, 0, 0);
if (runtime_options.verbose)
{
printf(
_("%s requires primary to keep WAL files %s until at least %s\n"),
progname, first_wal_segment, last_wal_segment);
/*
* Only free the first_wal_segment since it was copied out of the
* pqresult.
*/
free(first_wal_segment);
first_wal_segment = NULL;
}
PQclear(res);
PQfinish(conn);
/* If the rsync failed then exit */
if (r != 0)
exit(ERR_BAD_RSYNC);
if (runtime_options.verbose)
printf(_("%s requires primary to keep WAL files %s until at least %s\n"),
progname, first_wal_segment, last_wal_segment);
log_info(_("%s requires primary to keep WAL files %s until at least %s\n"),
progname, first_wal_segment, last_wal_segment);
/* we need to create the pg_xlog sub directory too, i'm reusing a variable here */
snprintf(local_control_file, MAXFILENAME, "%s/pg_xlog", runtime_options.dest_dir);
/*
* We need to create the pg_xlog sub directory too, I'm reusing a variable
* here.
*/
maxlen_snprintf(local_control_file, "%s/pg_xlog", runtime_options.dest_dir);
if (!create_directory(local_control_file))
{
log_err(_("%s: couldn't create directory %s, you will need to do it manually...\n"),
@@ -994,7 +1062,7 @@ stop_backup:
}
/* Finally, write the recovery.conf file */
create_recovery_file(runtime_options.dest_dir);
create_recovery_file(runtime_options.dest_dir, NULL);
PQclear(res);
PQfinish(conn);
@@ -1011,10 +1079,10 @@ stop_backup:
static void
do_standby_promote(void)
{
PGconn *conn;
PGconn *conn;
PGresult *res;
char sqlquery[QUERY_STR_LEN];
char script[QUERY_STR_LEN];
char script[MAXLEN];
PGconn *old_master_conn;
int old_master_id;
@@ -1046,7 +1114,8 @@ do_standby_promote(void)
}
/* we also need to check if there isn't any master already */
old_master_conn = getMasterConnection(conn, options.node, options.cluster_name, &old_master_id);
old_master_conn = getMasterConnection(conn, options.node, options.cluster_name,
&old_master_id, NULL);
if (old_master_conn != NULL)
{
PQfinish(old_master_conn);
@@ -1058,7 +1127,7 @@ do_standby_promote(void)
printf(_("\n%s: Promoting standby...\n"), progname);
/* Get the data directory full path and the last subdirectory */
snprintf(sqlquery, QUERY_STR_LEN, "SELECT setting "
sqlquery_snprintf(sqlquery, "SELECT setting "
" FROM pg_settings WHERE name = 'data_directory'");
log_debug("standby promote: %s\n", sqlquery);
res = PQexec(conn, sqlquery);
@@ -1073,8 +1142,8 @@ do_standby_promote(void)
PQclear(res);
PQfinish(conn);
snprintf(recovery_file_path, MAXFILENAME, "%s/%s", data_dir, RECOVERY_FILE);
snprintf(recovery_done_path, MAXFILENAME, "%s/%s", data_dir, RECOVERY_DONE_FILE);
maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE);
maxlen_snprintf(recovery_done_path, "%s/%s", data_dir, RECOVERY_DONE_FILE);
rename(recovery_file_path, recovery_done_path);
/*
@@ -1083,7 +1152,7 @@ do_standby_promote(void)
* find an active server rather than one starting up. This may
* hang for up the default timeout (60 seconds).
*/
snprintf(script, QUERY_STR_LEN, "pg_ctl -D %s -w -m fast restart", data_dir);
maxlen_snprintf(script, "pg_ctl -D %s -w -m fast restart", data_dir);
r = system(script);
if (r != 0)
{
@@ -1102,7 +1171,6 @@ do_standby_promote(void)
log_err("\n%s: STANDBY PROMOTE successful. You should REINDEX any hash indexes you have.\n", progname);
}
PQfinish(conn);
return;
}
@@ -1110,11 +1178,11 @@ do_standby_promote(void)
static void
do_standby_follow(void)
{
PGconn *conn;
PGconn *conn;
PGresult *res;
char sqlquery[QUERY_STR_LEN];
char script[QUERY_STR_LEN];
char script[MAXLEN];
char master_conninfo[MAXLEN];
PGconn *master_conn;
int master_id;
@@ -1145,7 +1213,8 @@ do_standby_follow(void)
}
/* we also need to check if there is any master in the cluster */
master_conn = getMasterConnection(conn, options.node, options.cluster_name, &master_id);
master_conn = getMasterConnection(conn, options.node,
options.cluster_name, &master_id,(char *) &master_conninfo);
if (master_conn == NULL)
{
PQfinish(conn);
@@ -1190,11 +1259,10 @@ do_standby_follow(void)
strncpy(runtime_options.masterport, PQport(master_conn), MAXLEN);
PQfinish(master_conn);
if (runtime_options.verbose)
printf(_("\n%s: Changing standby's master...\n"), progname);
log_info(_("%s Changing standby's master"),progname);
/* Get the data directory full path */
snprintf(sqlquery, QUERY_STR_LEN, "SELECT setting "
sqlquery_snprintf(sqlquery, "SELECT setting "
" FROM pg_settings WHERE name = 'data_directory'");
log_debug("standby follow: %s\n", sqlquery);
res = PQexec(conn, sqlquery);
@@ -1210,12 +1278,12 @@ do_standby_follow(void)
PQfinish(conn);
/* write the recovery.conf file */
if (!create_recovery_file(data_dir))
if (!create_recovery_file(data_dir,NULL))
exit(ERR_BAD_CONFIG);
/* Finally, restart the service */
/* We assume the pg_ctl script is in the PATH */
snprintf(script, QUERY_STR_LEN, "pg_ctl -D %s -m fast restart", data_dir);
maxlen_snprintf(script, "pg_ctl -D %s -m fast restart", data_dir);
r = system(script);
if (r != 0)
{
@@ -1238,44 +1306,50 @@ void help(const char *progname)
{
printf(_("\n%s: Replicator manager \n"), progname);
printf(_("Usage:\n"));
printf(_(" %s [OPTIONS] master {register}\n"), progname);
printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"), progname);
printf(_(" %s [OPTIONS] master {register}\n"), progname);
printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"),
progname);
printf(_("\nGeneral options:\n"));
printf(_(" --help show this help, then exit\n"));
printf(_(" --version output version information, then exit\n"));
printf(_(" --verbose output verbose activity information\n"));
printf(_(" --help show this help, then exit\n"));
printf(_(" --version output version information, then exit\n"));
printf(_(" --verbose output verbose activity information\n"));
printf(_("\nConnection options:\n"));
printf(_(" -d, --dbname=DBNAME database to connect to\n"));
printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
printf(_(" -p, --port=PORT database server port\n"));
printf(_(" -U, --username=USERNAME database user name to connect as\n"));
printf(_(" -d, --dbname=DBNAME database to connect to\n"));
printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
printf(_(" -p, --port=PORT database server port\n"));
printf(_(" -U, --username=USERNAME database user name to connect as\n"));
printf(_("\nConfiguration options:\n"));
printf(_(" -D, --data-dir=DIR local directory where the files will be copied to\n"));
printf(_(" -f, --config_file=PATH path to the configuration file\n"));
printf(_(" -R, --remote-user=USERNAME database server username for rsync\n"));
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n"));
printf(_(" -F, --force force potentially dangerous operations to happen\n"));
printf(_(" -D, --data-dir=DIR local directory where the files will be copied to\n"));
printf(_(" -f, --config_file=PATH path to the configuration file\n"));
printf(_(" -R, --remote-user=USERNAME database server username for rsync\n"));
printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n"));
printf(_(" -F, --force force potentially dangerous operations to happen\n"));
printf(_("\n%s performs some tasks like clone a node, promote it "), progname);
printf(_("or making follow another node and then exits.\n"));
printf(_("COMMANDS:\n"));
printf(_(" master register - registers the master in a cluster\n"));
printf(_(" standby register - registers a standby in a cluster\n"));
printf(_(" standby clone [node] - allows creation of a new standby\n"));
printf(_(" standby promote - allows manual promotion of a specific standby into a "));
printf(_(" master register - registers the master in a cluster\n"));
printf(_(" standby register - registers a standby in a cluster\n"));
printf(_(" standby clone [node] - allows creation of a new standby\n"));
printf(_(" standby promote - allows manual promotion of a specific standby into a "));
printf(_("new master in the event of a failover\n"));
printf(_(" standby follow - allows the standby to re-point itself to a new master\n"));
printf(_(" standby follow - allows the standby to re-point itself to a new master\n"));
}
/*
* Creates a recovery file for a standby.
*
* Writes master_conninfo to recovery.conf if is non-NULL
*/
static bool
create_recovery_file(const char *data_dir)
create_recovery_file(const char *data_dir, char *master_conninfo)
{
FILE *recovery_file;
char recovery_file_path[MAXFILENAME];
char recovery_file_path[MAXLEN];
char line[MAXLEN];
snprintf(recovery_file_path, MAXFILENAME, "%s/%s", data_dir, RECOVERY_FILE);
maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE);
recovery_file = fopen(recovery_file_path, "w");
if (recovery_file == NULL)
@@ -1284,7 +1358,7 @@ create_recovery_file(const char *data_dir)
return false;
}
snprintf(line, MAXLEN, "standby_mode = 'on'\n");
maxlen_snprintf(line, "standby_mode = 'on'\n");
if (fputs(line, recovery_file) == EOF)
{
log_err("recovery file could not be written, it could be necessary to create it manually\n");
@@ -1292,7 +1366,34 @@ create_recovery_file(const char *data_dir)
return false;
}
snprintf(line, MAXLEN, "primary_conninfo = 'host=%s port=%s'\n", runtime_options.host, runtime_options.masterport);
maxlen_snprintf(line, "primary_conninfo = 'host=%s port=%s'\n", runtime_options.host, runtime_options.masterport);
/*
* Template a password into the connection string in recovery.conf.
* Sometimes this is passed by the user explicitly, and otherwise we try to
* get it into the environment
*
* XXX: This is pretty dirty, at least push this up to the caller rather
* than hitting environment variables at this level.
*/
if (master_conninfo == NULL)
{
char *password = getenv("PGPASSWORD");
if (password == NULL)
{
fprintf(stderr,
_("%s: Panic! PGPASSWORD not set, how can we get here?\n"),
progname);
exit(ERR_BAD_PASSWORD);
}
maxlen_snprintf(line,
"primary_conninfo = 'host=%s port=%s password=%s'\n",
runtime_options.host, runtime_options.masterport,
password);
}
if (fputs(line, recovery_file) == EOF)
{
log_err("recovery file could not be written, it could be necessary to create it manually\n");
@@ -1308,39 +1409,42 @@ create_recovery_file(const char *data_dir)
static int
copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory)
copy_remote_files(char *host, char *remote_user, char *remote_path,
char *local_path, bool is_directory)
{
char script[QUERY_STR_LEN];
char rsync_flags[QUERY_STR_LEN];
char host_string[QUERY_STR_LEN];
int r;
char script[MAXLEN];
char rsync_flags[MAXLEN];
char host_string[MAXLEN];
int r;
if (strnlen(options.rsync_options, QUERY_STR_LEN) == 0)
snprintf(rsync_flags, QUERY_STR_LEN, "--archive --checksum --compress --progress --rsh=ssh");
if (strnlen(options.rsync_options, MAXLEN) == 0)
maxlen_snprintf(
rsync_flags, "%s",
"--archive --checksum --compress --progress --rsh=ssh");
else
strncpy(rsync_flags, options.rsync_options, QUERY_STR_LEN);
maxlen_snprintf(rsync_flags, "%s", options.rsync_options);
if (runtime_options.force)
strcat(rsync_flags, " --delete");
if (remote_user == NULL)
{
snprintf(host_string, QUERY_STR_LEN, "%s",host);
maxlen_snprintf(host_string, "%s", host);
}
else
{
snprintf(host_string, QUERY_STR_LEN, "%s@%s",remote_user,host);
maxlen_snprintf(host_string,"%s@%s",remote_user,host);
}
if (is_directory)
{
strcat(rsync_flags, " --exclude=pg_xlog* --exclude=pg_control --exclude=*.pid");
snprintf(script, QUERY_STR_LEN, "rsync %s %s:%s/* %s",
maxlen_snprintf(script, "rsync %s %s:%s/* %s",
rsync_flags, host_string, remote_path, local_path);
}
else
{
snprintf(script, QUERY_STR_LEN, "rsync %s %s:%s %s/.",
maxlen_snprintf(script, "rsync %s %s:%s %s/.",
rsync_flags, host_string, remote_path, local_path);
}
@@ -1450,13 +1554,26 @@ check_parameters_for_action(const int action)
break;
case STANDBY_CLONE:
/*
* To clone a master into a standby we need connection parameters
* repmgr.conf is useless because we don't have a server running
* in the standby
* Issue a friendly notice that the configuration file is not
* necessary nor read at all in when performing a STANDBY CLONE
* action.
*/
if (runtime_options.config_file[0])
{
log_notice("Only command line parameters for the connection to the master are used when issuing a STANDBY CLONE command.\n");
log_notice("Only command line parameters for the connection "
"to the master are used when issuing a STANDBY CLONE command. "
"The passed configuration file is neither required nor used\n");
}
/*
* To clone a master into a standby we need connection parameters
* repmgr.conf is useless because we don't have a server running in
* the standby; warn the user, but keep going.
*/
if (runtime_options.host == NULL)
{
log_notice("You need to use connection parameters to "
"the master when issuing a STANDBY CLONE command.");
ok = false;
}
need_a_node = false;
break;

View File

@@ -24,15 +24,12 @@
#include "getopt_long.h"
#include "libpq-fe.h"
#include "strutil.h"
#include "dbutils.h"
#define PRIMARY_MODE 0
#define STANDBY_MODE 1
#define MAXLEN 80
#define MAXVERSIONSTR 16
#define QUERY_STR_LEN 8192
#include "config.h"
#define MAXFILENAME 1024
#define MAXLINELENGTH 4096
@@ -56,6 +53,7 @@
#define ERR_DB_CON 6
#define ERR_DB_QUERY 7
#define ERR_PROMOTED 8
#define ERR_BAD_PASSWORD 9
/* Run time options type */
typedef struct

View File

@@ -1,3 +1,10 @@
/*
* repmgr.sql
*
* Copyright (C) 2ndQuadrant, 2011
*
*/
CREATE USER repmgr;
CREATE SCHEMA repmgr;
@@ -5,27 +12,25 @@ CREATE SCHEMA repmgr;
* The table repl_nodes keeps information about all machines in
* a cluster
*/
drop table if exists repl_nodes cascade;
CREATE TABLE repl_nodes (
id integer primary key,
cluster text not null, -- Name to identify the cluster
conninfo text not null
id integer primary key,
cluster text not null, -- Name to identify the cluster
conninfo text not null
);
ALTER TABLE repl_nodes OWNER TO repmgr;
/*
* Keeps monitor info about every node and their relative "position"
* Keeps monitor info about every node and their relative "position"
* to primary
*/
drop table if exists repl_monitor cascade;
CREATE TABLE repl_monitor (
primary_node INTEGER NOT NULL,
standby_node INTEGER NOT NULL,
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
last_wal_primary_location TEXT NOT NULL,
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
last_wal_primary_location TEXT NOT NULL,
last_wal_standby_location TEXT NOT NULL,
replication_lag BIGINT NOT NULL,
apply_lag BIGINT NOT NULL
replication_lag BIGINT NOT NULL,
apply_lag BIGINT NOT NULL
);
ALTER TABLE repl_monitor OWNER TO repmgr;
@@ -33,21 +38,20 @@ ALTER TABLE repl_monitor OWNER TO repmgr;
/*
* This view shows the latest monitor info about every node.
* Interesting thing to see:
* replication_lag: in bytes (this is how far the latest xlog record
* replication_lag: in bytes (this is how far the latest xlog record
* we have received is from master)
* apply_lag: in bytes (this is how far the latest xlog record
* we have applied is from the latest record we
* we have applied is from the latest record we
* have received)
* time_lag: how many seconds are we from being up-to-date with master
*/
drop view if exists repl_status;
CREATE VIEW repl_status AS
WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node
ORDER BY last_monitor_time desc)
FROM repl_monitor)
SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location,
last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag,
pg_size_pretty(apply_lag) apply_lag,
SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location,
last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag,
pg_size_pretty(apply_lag) apply_lag,
age(now(), last_monitor_time) AS time_lag
FROM monitor_info a
WHERE row_number = 1;

View File

@@ -29,6 +29,7 @@
#include "repmgr.h"
#include "config.h"
#include "log.h"
#include "strutil.h"
#include "libpq/pqsignal.h"
@@ -40,6 +41,7 @@ PGconn *myLocalConn = NULL;
/* Primary info */
t_configuration_options primary_options;
PGconn *primaryConn = NULL;
char sqlquery[QUERY_STR_LEN];
@@ -52,7 +54,8 @@ char repmgr_schema[MAXLEN];
/*
* should initialize with {0} to be ANSI complaint ? but this raises
* error with gcc -Wall */
* error with gcc -Wall
*/
t_configuration_options config = {};
static void help(const char* progname);
@@ -69,22 +72,22 @@ static void handle_sigint(SIGNAL_ARGS);
static void setup_cancel_handler(void);
#define CloseConnections() \
if (PQisBusy(primaryConn) == 1) \
CancelQuery(); \
if (myLocalConn != NULL) \
PQfinish(myLocalConn); \
if (primaryConn != NULL && primaryConn != myLocalConn) \
PQfinish(primaryConn);
if (PQisBusy(primaryConn) == 1) \
CancelQuery(); \
if (myLocalConn != NULL) \
PQfinish(myLocalConn); \
if (primaryConn != NULL && primaryConn != myLocalConn) \
PQfinish(primaryConn);
/*
* Every 3 seconds, insert monitor info
*/
#define MonitorCheck() \
for (;;) \
{ \
MonitorExecute(); \
sleep(3); \
}
#define MonitorCheck() \
for (;;) \
{ \
MonitorExecute(); \
sleep(3); \
}
int
@@ -175,7 +178,8 @@ main(int argc, char **argv)
else
{
/* I need the id of the primary as well as a connection to it */
primaryConn = getMasterConnection(myLocalConn, local_options.node, local_options.cluster_name, &primary_options.node);
primaryConn = getMasterConnection(myLocalConn, local_options.node,
local_options.cluster_name, &primary_options.node,NULL);
if (primaryConn == NULL)
exit(ERR_BAD_CONFIG);
}
@@ -187,6 +191,10 @@ main(int argc, char **argv)
MonitorCheck();
}
/* Prevent a double-free */
if (primaryConn == myLocalConn)
myLocalConn = NULL;
/* close the connection to the database and cleanup */
CloseConnections();
@@ -245,7 +253,8 @@ MonitorExecute(void)
log_err(_("We couldn't reconnect to master. Now checking if another node has been promoted."));
for (connection_retries = 0; connection_retries < 6; connection_retries++)
{
primaryConn = getMasterConnection(myLocalConn, local_options.node, local_options.cluster_name, &primary_options.node);
primaryConn = getMasterConnection(myLocalConn, local_options.node,
local_options.cluster_name, &primary_options.node,NULL);
if (PQstatus(primaryConn) == CONNECTION_OK)
{
/* Connected, we can continue the process so break the loop */
@@ -283,9 +292,10 @@ MonitorExecute(void)
CancelQuery();
/* Get local xlog info */
snprintf(sqlquery, QUERY_STR_LEN,
"SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
"pg_last_xlog_replay_location()");
sqlquery_snprintf(
sqlquery,
"SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
"pg_last_xlog_replay_location()");
res = PQexec(myLocalConn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -302,7 +312,7 @@ MonitorExecute(void)
PQclear(res);
/* Get primary xlog info */
snprintf(sqlquery, QUERY_STR_LEN, "SELECT pg_current_xlog_location() ");
sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location() ");
res = PQexec(primaryConn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -323,8 +333,8 @@ MonitorExecute(void)
/*
* Build the SQL to execute on primary
*/
snprintf(sqlquery,
QUERY_STR_LEN, "INSERT INTO %s.repl_monitor "
sqlquery_snprintf(sqlquery,
"INSERT INTO %s.repl_monitor "
"VALUES(%d, %d, '%s'::timestamp with time zone, "
" '%s', '%s', "
" %lld, %lld)", repmgr_schema,
@@ -349,7 +359,7 @@ checkClusterConfiguration(void)
{
PGresult *res;
snprintf(sqlquery, QUERY_STR_LEN, "SELECT oid FROM pg_class "
sqlquery_snprintf(sqlquery, "SELECT oid FROM pg_class "
" WHERE oid = '%s.repl_nodes'::regclass",
repmgr_schema);
res = PQexec(myLocalConn, sqlquery);
@@ -363,8 +373,10 @@ checkClusterConfiguration(void)
}
/*
* If there isn't any results then we have not configured a primary node yet
* in repmgr or the connection string is pointing to the wrong database.
* If there isn't any results then we have not configured a primary node
* yet in repmgr or the connection string is pointing to the wrong
* database.
*
* XXX if we are the primary, should we try to create the tables needed?
*/
if (PQntuples(res) == 0)
@@ -387,7 +399,7 @@ checkNodeConfiguration(char *conninfo)
/*
* Check if we have my node information in repl_nodes
*/
snprintf(sqlquery, QUERY_STR_LEN, "SELECT * FROM %s.repl_nodes "
sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes "
" WHERE id = %d AND cluster = '%s' ",
repmgr_schema, local_options.node, local_options.cluster_name);
@@ -408,8 +420,9 @@ checkNodeConfiguration(char *conninfo)
if (PQntuples(res) == 0)
{
PQclear(res);
/* Adding the node */
snprintf(sqlquery, QUERY_STR_LEN, "INSERT INTO %s.repl_nodes "
sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes "
"VALUES (%d, '%s', '%s')",
repmgr_schema, local_options.node, local_options.cluster_name, local_options.conninfo);

72
strutil.c Normal file
View File

@@ -0,0 +1,72 @@
/*
* strutil.c
*
* Copyright (C) 2ndQuadrant, 2011
*
*/
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include "strutil.h"
static int xvsnprintf(char *str, size_t size, const char *format, va_list ap);
static int
xvsnprintf(char *str, size_t size, const char *format, va_list ap)
{
int retval;
retval = vsnprintf(str, size, format, ap);
if (retval >= size)
{
fprintf(stderr, "Buffer not large enough to format entire string\n");
exit(255);
}
return retval;
}
int
xsnprintf(char *str, size_t size, const char *format, ...)
{
va_list arglist;
int retval;
va_start(arglist, format);
retval = xvsnprintf(str, size, format, arglist);
va_end(arglist);
return retval;
}
int
sqlquery_snprintf(char *str, const char *format, ...)
{
va_list arglist;
int retval;
va_start(arglist, format);
retval = xvsnprintf(str, QUERY_STR_LEN, format, arglist);
va_end(arglist);
return retval;
}
int maxlen_snprintf(char *str, const char *format, ...)
{
va_list arglist;
int retval;
va_start(arglist, format);
retval = xvsnprintf(str, MAXLEN, format, arglist);
va_end(arglist);
return retval;
}

25
strutil.h Normal file
View File

@@ -0,0 +1,25 @@
/*
* strutil.h
*
* Copyright (C) 2ndQuadrant, 2010-2011
*
*/
#ifndef _STRUTIL_H_
#define _STRUTIL_H_
#include <stdlib.h>
#define QUERY_STR_LEN 8192
#define MAXLEN 1024
#define MAXLINELENGTH 4096
#define MAXVERSIONSTR 16
#define MAXCONNINFO 1024
extern int xsnprintf(char *str, size_t size, const char *format, ...);
extern int sqlquery_snprintf(char *str, const char *format, ...);
extern int maxlen_snprintf(char *str, const char *format, ...);
#endif /* _STRUTIL_H_ */

13
uninstall_repmgr.sql Normal file
View File

@@ -0,0 +1,13 @@
/*
* uninstall_repmgr.sql
*
* Copyright (C) 2ndQuadrant, 2010-2011
*
*/
DROP TABLE IF EXISTS repl_nodes;
DROP TABLE IF EXISTS repl_monitor;
DROP VIEW IF EXISTS repl_status;
DROP SCHEMA repmgr;
DROP USER repmgr;