diff --git a/CREDITS b/CREDITS index d2b17e6b..78e707b8 100644 --- a/CREDITS +++ b/CREDITS @@ -9,3 +9,4 @@ Bas van Oostveen Hannu Krosing Cédric Villemain Charles Duffy +Daniel Farina diff --git a/Makefile b/Makefile index 60839386..d225da13 100644 --- a/Makefile +++ b/Makefile @@ -2,8 +2,10 @@ # Makefile # Copyright (c) 2ndQuadrant, 2010-2011 -repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o -repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o +repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o +repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o + +DATA = repmgr.sql uninstall_repmgr.sql PG_CPPFLAGS = -I$(libpq_srcdir) PG_LIBS = $(libpq_pgport) @@ -26,10 +28,19 @@ include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif +# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now +# is overriding pgxs install. install: $(INSTALL_PROGRAM) repmgrd$(X) '$(DESTDIR)$(bindir)' $(INSTALL_PROGRAM) repmgr$(X) '$(DESTDIR)$(bindir)' +ifneq (,$(DATA)$(DATA_built)) + @for file in $(addprefix $(srcdir)/, $(DATA)) $(DATA_built); do \ + echo "$(INSTALL_DATA) $$file '$(DESTDIR)$(datadir)/$(datamoduledir)'"; \ + $(INSTALL_DATA) $$file '$(DESTDIR)$(datadir)/$(datamoduledir)'; \ + done +endif + clean: rm -f *.o rm -f repmgrd diff --git a/README.rst b/README.rst index 87795255..d2b3ad2b 100644 --- a/README.rst +++ b/README.rst @@ -555,6 +555,7 @@ following * ERR_DB_CON 6: Error when trying to connect to a database. * ERR_DB_QUERY 7: Error executing a database query. * ERR_PROMOTED 8: Exiting program because the node has been promoted to master. +* ERR_BAD_PASSWORD 9: Password used to connect to a database was rejected. Detailed walkthrough ==================== diff --git a/check_dir.c b/check_dir.c index 828e0ed6..e0d0953f 100644 --- a/check_dir.c +++ b/check_dir.c @@ -1,6 +1,6 @@ /* * check_dir.c - Directories management functions - * Copyright (C) 2ndQuadrant, 2011 + * Copyright (C) 2ndQuadrant, 2010-2011 * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -24,9 +24,12 @@ #include #include +/* NB: postgres_fe must be included BEFORE check_dir */ #include "postgres_fe.h" #include "check_dir.h" +#include "strutil.h" + static int mkdir_p(char *path, mode_t omode); @@ -64,7 +67,7 @@ check_dir(char *dir) } else { - result = 2; /* not empty */ + result = 2; /* not empty */ break; } } @@ -111,7 +114,7 @@ set_directory_permissions(char *dir) /* function from initdb.c */ -/* source stolen from FreeBSD /src/bin/mkdir/mkdir.c and adapted */ +/* source adapted from FreeBSD /src/bin/mkdir/mkdir.c */ /* * this tries to build all the elements of a path to a directory a la mkdir -p @@ -219,10 +222,11 @@ mkdir_p(char *path, mode_t omode) bool is_pg_dir(char *dir) { - char path[8192]; - struct stat sb; + const size_t buf_sz = 8192; + char path[buf_sz]; + struct stat sb; - sprintf(path, "%s/PG_VERSION", dir); + xsnprintf(path, buf_sz, "%s/PG_VERSION", dir); return (stat(path, &sb) == 0) ? true : false; } diff --git a/config.c b/config.c index 009aee84..f1b980b5 100644 --- a/config.c +++ b/config.c @@ -18,6 +18,8 @@ */ #include "config.h" +#include "repmgr.h" +#include "strutil.h" void parse_config(const char* config_file, t_configuration_options* options) diff --git a/config.h b/config.h index 27646c65..14af2ca5 100644 --- a/config.h +++ b/config.h @@ -21,6 +21,7 @@ #define _REPMGR_CONFIG_H_ #include "repmgr.h" +#include "strutil.h" typedef struct { diff --git a/dbutils.c b/dbutils.c index 336f0a05..18fc166d 100644 --- a/dbutils.c +++ b/dbutils.c @@ -19,8 +19,7 @@ #include "repmgr.h" -#define MAXQUERY 8192 -#define MAXCONNINFO 1024 +#include "strutil.h" PGconn * establishDBConnection(const char *conninfo, const bool exit_on_error) @@ -33,6 +32,7 @@ establishDBConnection(const char *conninfo, const bool exit_on_error) { fprintf(stderr, "Connection to database failed: %s", PQerrorMessage(conn)); + if (exit_on_error) { PQfinish(conn); @@ -44,7 +44,6 @@ establishDBConnection(const char *conninfo, const bool exit_on_error) } - bool is_standby(PGconn *conn) { @@ -52,6 +51,7 @@ is_standby(PGconn *conn) bool result; res = PQexec(conn, "SELECT pg_is_in_recovery()"); + if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "Can't query server mode: %s", PQerrorMessage(conn)); @@ -79,11 +79,15 @@ pg_version(PGconn *conn, char* major_version) { PGresult *res; - int major_version1; - char *major_version2; + int major_version1; + char *major_version2; + + res = PQexec(conn, + "WITH pg_version(ver) AS " + "(SELECT split_part(version(), ' ', 2)) " + "SELECT split_part(ver, '.', 1), split_part(ver, '.', 2) " + "FROM pg_version"); - res = PQexec(conn, "WITH pg_version(ver) AS (SELECT split_part(version(), ' ', 2)) " - "SELECT split_part(ver, '.', 1), split_part(ver, '.', 2) FROM pg_version"); if (PQresultStatus(res) != PGRES_TUPLES_OK) { fprintf(stderr, "PQexec failed: %s", PQerrorMessage(conn)); @@ -91,31 +95,35 @@ pg_version(PGconn *conn, char* major_version) PQfinish(conn); exit(ERR_DB_QUERY); } + major_version1 = atoi(PQgetvalue(res, 0, 0)); major_version2 = PQgetvalue(res, 0, 1); - PQclear(res); if (major_version1 >= 9) { /* form a major version string */ - snprintf(major_version, MAXVERSIONSTR, "%d.%s", major_version1, major_version2); + xsnprintf(major_version, MAXVERSIONSTR, "%d.%s", major_version1, + major_version2); } else strcpy(major_version, ""); + PQclear(res); + return major_version; } bool -guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value) +guc_setted(PGconn *conn, const char *parameter, const char *op, + const char *value) { PGresult *res; - char sqlquery[MAXQUERY]; + char sqlquery[QUERY_STR_LEN]; - sprintf(sqlquery, "SELECT true FROM pg_settings " - " WHERE name = '%s' AND setting %s '%s'", - parameter, op, value); + sqlquery_snprintf(sqlquery, "SELECT true FROM pg_settings " + " WHERE name = '%s' AND setting %s '%s'", + parameter, op, value); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -140,11 +148,13 @@ const char * get_cluster_size(PGconn *conn) { PGresult *res; - const char *size; - char sqlquery[MAXQUERY]; + const char *size; + char sqlquery[QUERY_STR_LEN]; - sprintf(sqlquery, "SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) " - " FROM pg_database "); + sqlquery_snprintf( + sqlquery, + "SELECT pg_size_pretty(SUM(pg_database_size(oid))::bigint) " + " FROM pg_database "); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -162,26 +172,57 @@ get_cluster_size(PGconn *conn) /* * get a connection to master by reading repl_nodes, creating a connection * to each node (one at a time) and finding if it is a master or a standby + * + * NB: If master_conninfo_out may be NULL. If it is non-null, it is assumed to + * point to allocated memory of MAXCONNINFO in length, and the master server + * connection string is placed there. */ PGconn * -getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id) +getMasterConnection(PGconn *standby_conn, int id, char *cluster, + int *master_id, char *master_conninfo_out) { - PGconn *master_conn = NULL; - PGresult *res1; - PGresult *res2; - char sqlquery[MAXQUERY]; - char master_conninfo[MAXCONNINFO]; + PGconn *master_conn = NULL; + PGresult *res1; + PGresult *res2; + char sqlquery[QUERY_STR_LEN]; + char master_conninfo_stack[MAXCONNINFO]; + char *master_conninfo = &*master_conninfo_stack; + char schema_str[MAXLEN]; + char schema_quoted[MAXLEN]; + int i; + /* + * If the caller wanted to get a copy of the connection info string, sub + * out the local stack pointer for the pointer passed by the caller. + */ + if (master_conninfo_out != NULL) + master_conninfo = master_conninfo_out; + + /* + * XXX: This is copied in at least two other procedures + * + * Assemble the unquoted schema name + */ + maxlen_snprintf(schema_str, "repmgr_%s", cluster); + { + char *identifier = PQescapeIdentifier(standby_conn, schema_str, + strlen(schema_str)); + + maxlen_snprintf(schema_quoted, "%s", identifier); + PQfreemem(identifier); + } + /* find all nodes belonging to this cluster */ - sprintf(sqlquery, "SELECT * FROM repmgr_%s.repl_nodes " - " WHERE cluster = '%s' and id <> %d", - cluster, cluster, id); + sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes " + " WHERE cluster = '%s' and id <> %d", + schema_quoted, cluster, id); res1 = PQexec(standby_conn, sqlquery); if (PQresultStatus(res1) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get nodes info: %s\n", PQerrorMessage(standby_conn)); + fprintf(stderr, "Can't get nodes info: %s\n", + PQerrorMessage(standby_conn)); PQclear(res1); PQfinish(standby_conn); exit(ERR_DB_QUERY); @@ -193,18 +234,21 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id) *master_id = atoi(PQgetvalue(res1, i, 0)); strncpy(master_conninfo, PQgetvalue(res1, i, 2), MAXCONNINFO); master_conn = establishDBConnection(master_conninfo, false); + if (PQstatus(master_conn) != CONNECTION_OK) continue; /* * I can't use the is_standby() function here because on error that - * function closes the connection i pass and exit, but i still need to close - * standby_conn + * function closes the connection i pass and exit, but i still need to + * close standby_conn */ res2 = PQexec(master_conn, "SELECT pg_is_in_recovery()"); + if (PQresultStatus(res2) != PGRES_TUPLES_OK) { - fprintf(stderr, "Can't get recovery state from this node: %s\n", PQerrorMessage(master_conn)); + fprintf(stderr, "Can't get recovery state from this node: %s\n", + PQerrorMessage(master_conn)); PQclear(res2); PQfinish(master_conn); continue; @@ -229,7 +273,8 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id) /* If we finish this loop without finding a master then * we doesn't have the info or the master has failed (or we * reached max_connections or superuser_reserved_connections, - * anything else i'm missing?), + * anything else I'm missing?). + * * Probably we will need to check the error to know if we need * to start failover procedure or just fix some situation on the * standby. @@ -237,4 +282,3 @@ getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id) PQclear(res1); return NULL; } - diff --git a/dbutils.h b/dbutils.h index 23ed4afe..45d0ea96 100644 --- a/dbutils.h +++ b/dbutils.h @@ -21,10 +21,12 @@ #define _REPMGR_DBUTILS_H_ PGconn *establishDBConnection(const char *conninfo, const bool exit_on_error); -bool is_standby(PGconn *conn); +bool is_standby(PGconn *conn); char *pg_version(PGconn *conn, char* major_version); -bool guc_setted(PGconn *conn, const char *parameter, const char *op, const char *value); -const char *get_cluster_size(PGconn *conn); -PGconn * getMasterConnection(PGconn *standby_conn, int id, char *cluster, int *master_id); +bool guc_setted(PGconn *conn, const char *parameter, const char *op, + const char *value); +const char *get_cluster_size(PGconn *conn); +PGconn *getMasterConnection(PGconn *standby_conn, int id, char *cluster, + int *master_id, char *master_conninfo_out); #endif diff --git a/repmgr.c b/repmgr.c index 395c7c7c..5fda40ab 100644 --- a/repmgr.c +++ b/repmgr.c @@ -34,20 +34,22 @@ #include "log.h" #include "config.h" #include "check_dir.h" +#include "strutil.h" #define RECOVERY_FILE "recovery.conf" #define RECOVERY_DONE_FILE "recovery.done" -#define NO_ACTION 0 /* Not a real action, just to initialize */ -#define MASTER_REGISTER 1 +#define NO_ACTION 0 /* Not a real action, just to initialize */ +#define MASTER_REGISTER 1 #define STANDBY_REGISTER 2 #define STANDBY_CLONE 3 #define STANDBY_PROMOTE 4 #define STANDBY_FOLLOW 5 static void help(const char *progname); -static bool create_recovery_file(const char *data_dir); -static int copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory); +static bool create_recovery_file(const char *data_dir, char *master_conninfo); +static int copy_remote_files(char *host, char *remote_user, char *remote_path, + char *local_path, bool is_directory); static bool check_parameters_for_action(const int action); static void do_master_register(void); @@ -111,7 +113,8 @@ main(int argc, char **argv) } - while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:R:w:F:v", long_options, &optindex)) != -1) + while ((c = getopt_long(argc, argv, "d:h:p:U:D:f:R:w:F:v", long_options, + &optindex)) != -1) { switch (c) { @@ -164,7 +167,8 @@ main(int argc, char **argv) if (optind < argc) { server_mode = argv[optind++]; - if (strcasecmp(server_mode, "STANDBY") != 0 && strcasecmp(server_mode, "MASTER") != 0) + if (strcasecmp(server_mode, "STANDBY") != 0 && + strcasecmp(server_mode, "MASTER") != 0) { usage(); exit(ERR_BAD_CONFIG); @@ -240,36 +244,18 @@ main(int argc, char **argv) } /* - * Read the configuration file: repmgr.conf + * Read the configuration file: repmgr.conf, but only if we're not doing a + * STANDBY CLONE action: it is not necessary to have the configuration file + * in that case. */ - parse_config(runtime_options.config_file, &options); - if (options.node == -1) + if (action != STANDBY_CLONE) { - fprintf(stderr, "Node information is missing. " - "Check the configuration file.\n"); - exit(ERR_BAD_CONFIG); + if (runtime_options.verbose) + printf(_("Opening configuration file: %s\n"), runtime_options.config_file); + + parse_config(runtime_options.config_file, &options); } - keywords[2] = "user"; - values[2] = runtime_options.username; - keywords[3] = "dbname"; - values[3] = runtime_options.dbname; - keywords[4] = "application_name"; - values[4] = (char *) progname; - keywords[5] = NULL; - values[5] = NULL; - - if (!runtime_options.config_file[0]) - strncpy(runtime_options.config_file, DEFAULT_CONFIG_FILE, MAXLEN); - - /* - * Read the configuration file: repmgr.conf - */ - if (runtime_options.verbose) - printf(_("Opening configuration file: %s\n"), runtime_options.config_file); - - parse_config(runtime_options.config_file, &options); - if (need_a_node) { @@ -317,11 +303,12 @@ main(int argc, char **argv) static void do_master_register(void) { - PGconn *conn; + PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; bool schema_exists = false; + char schema_quoted[MAXLEN]; char master_version[MAXVERSIONSTR]; conn = establishDBConnection(options.conninfo, true); @@ -343,8 +330,22 @@ do_master_register(void) exit(ERR_BAD_CONFIG); } + /* Assemble a quoted schema name + * XXX This is not currently used due to a merge conflict, but + * probably should be */ + if (false) + { + char *identifier = PQescapeIdentifier(conn, repmgr_schema, + strlen(repmgr_schema)); + + maxlen_snprintf(schema_quoted, "%s", identifier); + PQfreemem(identifier); + } + /* Check if there is a schema for this cluster */ - snprintf(sqlquery, QUERY_STR_LEN, "SELECT 1 FROM pg_namespace WHERE nspname = '%s'", repmgr_schema); + sqlquery_snprintf(sqlquery, + "SELECT 1 FROM pg_namespace " + "WHERE nspname = '%s'", repmgr_schema); log_debug("master register: %s\n", sqlquery); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -373,7 +374,7 @@ do_master_register(void) log_info("master register: creating database objects inside the %s schema", repmgr_schema); /* ok, create the schema */ - snprintf(sqlquery, QUERY_STR_LEN, "CREATE SCHEMA %s", repmgr_schema); + sqlquery_snprintf(sqlquery, "CREATE SCHEMA %s", repmgr_schema); log_debug("master register: %s\n", sqlquery); if (!PQexec(conn, sqlquery)) { @@ -384,7 +385,7 @@ do_master_register(void) } /* ... the tables */ - snprintf(sqlquery, QUERY_STR_LEN, "CREATE TABLE %s.repl_nodes ( " + sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_nodes ( " " id integer primary key, " " cluster text not null, " " conninfo text not null)", repmgr_schema); @@ -397,7 +398,7 @@ do_master_register(void) exit(ERR_BAD_CONFIG); } - snprintf(sqlquery, QUERY_STR_LEN, "CREATE TABLE %s.repl_monitor ( " + sqlquery_snprintf(sqlquery, "CREATE TABLE %s.repl_monitor ( " " primary_node INTEGER NOT NULL, " " standby_node INTEGER NOT NULL, " " last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, " @@ -415,7 +416,7 @@ do_master_register(void) } /* and the view */ - snprintf(sqlquery, QUERY_STR_LEN, "CREATE VIEW %s.repl_status AS " + sqlquery_snprintf(sqlquery, "CREATE VIEW %s.repl_status AS " " WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node " " ORDER BY last_monitor_time desc) " " FROM %s.repl_monitor) " @@ -436,10 +437,11 @@ do_master_register(void) else { PGconn *master_conn; - int id; + int id; /* Ensure there isn't any other master already registered */ - master_conn = getMasterConnection(conn, options.node, options.cluster_name, &id); + master_conn = getMasterConnection(conn, options.node, + options.cluster_name, &id,NULL); if (master_conn != NULL) { PQfinish(master_conn); @@ -451,7 +453,7 @@ do_master_register(void) /* Now register the master */ if (runtime_options.force) { - snprintf(sqlquery, QUERY_STR_LEN, "DELETE FROM %s.repl_nodes " + sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes " " WHERE id = %d", repmgr_schema, options.node); log_debug("master register: %s\n", sqlquery); @@ -465,7 +467,7 @@ do_master_register(void) } } - snprintf(sqlquery, QUERY_STR_LEN, "INSERT INTO %s.repl_nodes " + sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes " "VALUES (%d, '%s', '%s')", repmgr_schema, options.node, options.cluster_name, options.conninfo); log_debug("master register: %s\n", sqlquery); @@ -488,18 +490,21 @@ do_master_register(void) static void do_standby_register(void) { - PGconn *conn; + PGconn *conn; PGconn *master_conn; int master_id; PGresult *res; char sqlquery[QUERY_STR_LEN]; + char schema_quoted[MAXLEN]; char master_version[MAXVERSIONSTR]; char standby_version[MAXVERSIONSTR]; conn = establishDBConnection(options.conninfo, true); + /* XXX: A lot of copied code from do_master_register! Refactor */ + /* should be v9 or better */ pg_version(conn, standby_version); if (strcmp(standby_version, "") == 0) @@ -517,8 +522,20 @@ do_standby_register(void) exit(ERR_BAD_CONFIG); } + /* Assemble a quoted schema name + * XXX This is not currently used due to a merge conflict, but + * probably should be */ + if (false) + { + char *identifier = PQescapeIdentifier(conn, repmgr_schema, + strlen(repmgr_schema)); + + maxlen_snprintf(schema_quoted, "%s", identifier); + PQfreemem(identifier); + } + /* Check if there is a schema for this cluster */ - snprintf(sqlquery, QUERY_STR_LEN, "SELECT 1 FROM pg_namespace WHERE nspname = '%s'", repmgr_schema); + sqlquery_snprintf(sqlquery, "SELECT 1 FROM pg_namespace WHERE nspname = '%s'", repmgr_schema); log_debug("standby register: %s\n", sqlquery); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -529,8 +546,9 @@ do_standby_register(void) exit(ERR_BAD_CONFIG); } - if (PQntuples(res) == 0) /* schema doesn't exist */ + if (PQntuples(res) == 0) { + /* schema doesn't exist */ log_err("Schema %s doesn't exists.\n", repmgr_schema); PQclear(res); PQfinish(conn); @@ -539,7 +557,8 @@ do_standby_register(void) PQclear(res); /* check if there is a master in this cluster */ - master_conn = getMasterConnection(conn, options.node, options.cluster_name, &master_id); + master_conn = getMasterConnection(conn, options.node, options.cluster_name, + &master_id, NULL); if (!master_conn) { log_err(_("A master must be defined before configuring a slave\n")); @@ -570,7 +589,7 @@ do_standby_register(void) /* Now register the standby */ if (runtime_options.force) { - snprintf(sqlquery, QUERY_STR_LEN, "DELETE FROM %s.repl_nodes " + sqlquery_snprintf(sqlquery, "DELETE FROM %s.repl_nodes " " WHERE id = %d", repmgr_schema, options.node); log_debug("standby register: %s\n", sqlquery); @@ -585,7 +604,7 @@ do_standby_register(void) } } - snprintf(sqlquery, QUERY_STR_LEN, "INSERT INTO %s.repl_nodes " + sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes " "VALUES (%d, '%s', '%s')", repmgr_schema, options.node, options.cluster_name, options.conninfo); log_debug("standby register: %s\n", sqlquery); @@ -608,9 +627,9 @@ do_standby_register(void) static void do_standby_clone(void) { - PGconn *conn; + PGconn *conn; PGresult *res; - char sqlquery[QUERY_STR_LEN]; + char sqlquery[QUERY_STR_LEN]; int r = 0; int i; @@ -624,8 +643,8 @@ do_standby_clone(void) char local_control_file[MAXFILENAME]; char tblspc_dir[MAXFILENAME]; - const char *first_wal_segment = NULL; - const char *last_wal_segment = NULL; + char *first_wal_segment = NULL; + const char *last_wal_segment = NULL; char master_version[MAXVERSIONSTR]; @@ -745,8 +764,14 @@ do_standby_clone(void) log_info(_("Succesfully connected to primary. Current installation size is %s\n"), get_cluster_size(conn)); - /* Check if the tablespace locations exists and that we can write to them */ - snprintf(sqlquery, QUERY_STR_LEN, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); + /* + * Check if the tablespace locations exists and that we can write to + * them. + */ + sqlquery_snprintf(sqlquery, + "SELECT spclocation " + " FROM pg_tablespace " + "WHERE spcname NOT IN ('pg_default', 'pg_global')"); log_debug("standby clone: %s\n", sqlquery); res = PQexec(conn, sqlquery); @@ -798,9 +823,10 @@ do_standby_clone(void) /* Present and not empty */ if (!runtime_options.force) { - fprintf(stderr, - _("%s: directory \"%s\" exists but is not empty\n"), - progname, tblspc_dir); + fprintf( + stderr, + _("%s: directory \"%s\" exists but is not empty\n"), + progname, tblspc_dir); PQclear(res); PQfinish(conn); exit(ERR_BAD_CONFIG); @@ -818,9 +844,10 @@ do_standby_clone(void) log_notice("Starting backup...\n"); /* Get the data directory full path and the configuration files location */ - snprintf(sqlquery, QUERY_STR_LEN, "SELECT name, setting " - " FROM pg_settings " - " WHERE name IN ('data_directory', 'config_file', 'hba_file', 'ident_file')"); + sqlquery_snprintf(sqlquery, + "SELECT name, setting " + " FROM pg_settings " + " WHERE name IN ('data_directory', 'config_file', 'hba_file', 'ident_file')"); log_debug("standby clone: %s\n", sqlquery); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -849,9 +876,11 @@ do_standby_clone(void) * inform the master we will start a backup and get the first XLog filename * so we can say to the user we need those files */ - snprintf(sqlquery, QUERY_STR_LEN, "SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))", time(NULL)); + sqlquery_snprintf( + sqlquery, + "SELECT pg_xlogfile_name(pg_start_backup('repmgr_standby_clone_%ld'))", + time(NULL)); log_debug("standby clone: %s\n", sqlquery); - res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) { @@ -860,25 +889,36 @@ do_standby_clone(void) PQfinish(conn); exit(ERR_BAD_CONFIG); } - first_wal_segment = PQgetvalue(res, 0, 0); + + if (runtime_options.verbose) + { + char *first_wal_seg_pq = PQgetvalue(res, 0, 0); + size_t buf_sz = strlen(first_wal_seg_pq); + + first_wal_segment = malloc(buf_sz + 1); + xsnprintf(first_wal_segment, buf_sz + 1, "%s", first_wal_seg_pq); + } + PQclear(res); /* * 1) first move global/pg_control * - * 2) then move data_directory ommiting the files we have already moved and pg_xlog - * content + * 2) then move data_directory ommiting the files we have already moved and + * pg_xlog content * - * 3) finally We need to backup configuration files (that could be on other directories, debian - * like systems likes to do that), so look at config_file, hba_file and ident_file but we - * can omit external_pid_file ;) + * 3) finally We need to backup configuration files (that could be on other + * directories, debian like systems likes to do that), so look at + * config_file, hba_file and ident_file but we can omit + * external_pid_file ;) * * On error we need to return but before that execute pg_stop_backup() */ /* need to create the global sub directory */ - snprintf(master_control_file, MAXFILENAME, "%s/global/pg_control", master_data_directory); - snprintf(local_control_file, MAXFILENAME, "%s/global", runtime_options.dest_dir); + maxlen_snprintf(master_control_file, "%s/global/pg_control", + master_data_directory); + maxlen_snprintf(local_control_file, "%s/global", runtime_options.dest_dir); if (!create_directory(local_control_file)) { log_err(_("%s: couldn't create directory %s ...\n"), @@ -887,7 +927,8 @@ do_standby_clone(void) } log_info("standby clone: master control file '%s'\n", master_control_file); - r = copy_remote_files(runtime_options.host, runtime_options.remote_user, master_control_file, local_control_file, false); + r = copy_remote_files(runtime_options.host, runtime_options.remote_user, + master_control_file, local_control_file, false); if (r != 0) { log_warning("standby clone: failed copying master control file '%s'\n", master_control_file); @@ -895,7 +936,8 @@ do_standby_clone(void) } log_info("standby clone: master data directory '%s'\n", master_data_directory); - r = copy_remote_files(runtime_options.host, runtime_options.remote_user, master_data_directory, runtime_options.dest_dir, true); + r = copy_remote_files(runtime_options.host, runtime_options.remote_user, + master_data_directory, runtime_options.dest_dir, true); if (r != 0) { log_warning("standby clone: failed copying master data directory '%s'\n", master_data_directory); @@ -903,10 +945,14 @@ do_standby_clone(void) } /* - * Copy tablespace locations, i'm doing this separately because i couldn't find and appropiate - * rsync option but besides we could someday make all these rsync happen concurrently + * Copy tablespace locations, i'm doing this separately because i couldn't + * find and appropiate rsync option but besides we could someday make all + * these rsync happen concurrently */ - snprintf(sqlquery, QUERY_STR_LEN, "select spclocation from pg_tablespace where spcname not in ('pg_default', 'pg_global')"); + sqlquery_snprintf(sqlquery, + "SELECT spclocation " + " FROM pg_tablespace " + " WHERE spcname NOT IN ('pg_default', 'pg_global')"); log_debug("standby clone: %s\n", sqlquery); res = PQexec(conn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -919,7 +965,8 @@ do_standby_clone(void) { strncpy(tblspc_dir, PQgetvalue(res, i, 0), MAXFILENAME); log_info("standby clone: master tablespace '%s'\n", tblspc_dir); - r = copy_remote_files(runtime_options.host, runtime_options.remote_user, tblspc_dir, tblspc_dir, true); + r = copy_remote_files(runtime_options.host, runtime_options.remote_user, + tblspc_dir, tblspc_dir, true); if (r != 0) { log_warning("standby clone: failed copying tablespace directory '%s'\n", tblspc_dir); @@ -928,7 +975,8 @@ do_standby_clone(void) } log_info("standby clone: master config file '%s'\n", master_config_file); - r = copy_remote_files(runtime_options.host, runtime_options.remote_user, master_config_file, runtime_options.dest_dir, false); + r = copy_remote_files(runtime_options.host, runtime_options.remote_user, + master_config_file, runtime_options.dest_dir, false); if (r != 0) { log_warning("standby clone: failed copying master config file '%s'\n", master_config_file); @@ -944,7 +992,8 @@ do_standby_clone(void) } log_info("standby clone: master ident file '%s'\n", master_ident_file); - r = copy_remote_files(runtime_options.host, runtime_options.remote_user, master_ident_file, runtime_options.dest_dir, false); + r = copy_remote_files(runtime_options.host, runtime_options.remote_user, + master_ident_file, runtime_options.dest_dir, false); if (r != 0) { log_warning("standby clone: failed copying master ident file '%s'\n", master_ident_file); @@ -963,7 +1012,7 @@ stop_backup: log_notice("Finishing backup...\n"); - snprintf(sqlquery, QUERY_STR_LEN, "SELECT pg_xlogfile_name(pg_stop_backup())"); + sqlquery_snprintf(sqlquery, "SELECT pg_xlogfile_name(pg_stop_backup())"); log_debug("standby clone: %s\n", sqlquery); res = PQexec(conn, sqlquery); @@ -976,16 +1025,35 @@ stop_backup: } last_wal_segment = PQgetvalue(res, 0, 0); + if (runtime_options.verbose) + { + printf( + _("%s requires primary to keep WAL files %s until at least %s\n"), + progname, first_wal_segment, last_wal_segment); + + /* + * Only free the first_wal_segment since it was copied out of the + * pqresult. + */ + free(first_wal_segment); + first_wal_segment = NULL; + } + + PQclear(res); + PQfinish(conn); + /* If the rsync failed then exit */ if (r != 0) exit(ERR_BAD_RSYNC); - if (runtime_options.verbose) - printf(_("%s requires primary to keep WAL files %s until at least %s\n"), - progname, first_wal_segment, last_wal_segment); + log_info(_("%s requires primary to keep WAL files %s until at least %s\n"), + progname, first_wal_segment, last_wal_segment); - /* we need to create the pg_xlog sub directory too, i'm reusing a variable here */ - snprintf(local_control_file, MAXFILENAME, "%s/pg_xlog", runtime_options.dest_dir); + /* + * We need to create the pg_xlog sub directory too, I'm reusing a variable + * here. + */ + maxlen_snprintf(local_control_file, "%s/pg_xlog", runtime_options.dest_dir); if (!create_directory(local_control_file)) { log_err(_("%s: couldn't create directory %s, you will need to do it manually...\n"), @@ -994,7 +1062,7 @@ stop_backup: } /* Finally, write the recovery.conf file */ - create_recovery_file(runtime_options.dest_dir); + create_recovery_file(runtime_options.dest_dir, NULL); PQclear(res); PQfinish(conn); @@ -1011,10 +1079,10 @@ stop_backup: static void do_standby_promote(void) { - PGconn *conn; + PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; - char script[QUERY_STR_LEN]; + char script[MAXLEN]; PGconn *old_master_conn; int old_master_id; @@ -1046,7 +1114,8 @@ do_standby_promote(void) } /* we also need to check if there isn't any master already */ - old_master_conn = getMasterConnection(conn, options.node, options.cluster_name, &old_master_id); + old_master_conn = getMasterConnection(conn, options.node, options.cluster_name, + &old_master_id, NULL); if (old_master_conn != NULL) { PQfinish(old_master_conn); @@ -1058,7 +1127,7 @@ do_standby_promote(void) printf(_("\n%s: Promoting standby...\n"), progname); /* Get the data directory full path and the last subdirectory */ - snprintf(sqlquery, QUERY_STR_LEN, "SELECT setting " + sqlquery_snprintf(sqlquery, "SELECT setting " " FROM pg_settings WHERE name = 'data_directory'"); log_debug("standby promote: %s\n", sqlquery); res = PQexec(conn, sqlquery); @@ -1073,8 +1142,8 @@ do_standby_promote(void) PQclear(res); PQfinish(conn); - snprintf(recovery_file_path, MAXFILENAME, "%s/%s", data_dir, RECOVERY_FILE); - snprintf(recovery_done_path, MAXFILENAME, "%s/%s", data_dir, RECOVERY_DONE_FILE); + maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); + maxlen_snprintf(recovery_done_path, "%s/%s", data_dir, RECOVERY_DONE_FILE); rename(recovery_file_path, recovery_done_path); /* @@ -1083,7 +1152,7 @@ do_standby_promote(void) * find an active server rather than one starting up. This may * hang for up the default timeout (60 seconds). */ - snprintf(script, QUERY_STR_LEN, "pg_ctl -D %s -w -m fast restart", data_dir); + maxlen_snprintf(script, "pg_ctl -D %s -w -m fast restart", data_dir); r = system(script); if (r != 0) { @@ -1102,7 +1171,6 @@ do_standby_promote(void) log_err("\n%s: STANDBY PROMOTE successful. You should REINDEX any hash indexes you have.\n", progname); } PQfinish(conn); - return; } @@ -1110,11 +1178,11 @@ do_standby_promote(void) static void do_standby_follow(void) { - PGconn *conn; + PGconn *conn; PGresult *res; char sqlquery[QUERY_STR_LEN]; - char script[QUERY_STR_LEN]; - + char script[MAXLEN]; + char master_conninfo[MAXLEN]; PGconn *master_conn; int master_id; @@ -1145,7 +1213,8 @@ do_standby_follow(void) } /* we also need to check if there is any master in the cluster */ - master_conn = getMasterConnection(conn, options.node, options.cluster_name, &master_id); + master_conn = getMasterConnection(conn, options.node, + options.cluster_name, &master_id,(char *) &master_conninfo); if (master_conn == NULL) { PQfinish(conn); @@ -1190,11 +1259,10 @@ do_standby_follow(void) strncpy(runtime_options.masterport, PQport(master_conn), MAXLEN); PQfinish(master_conn); - if (runtime_options.verbose) - printf(_("\n%s: Changing standby's master...\n"), progname); + log_info(_("%s Changing standby's master"),progname); /* Get the data directory full path */ - snprintf(sqlquery, QUERY_STR_LEN, "SELECT setting " + sqlquery_snprintf(sqlquery, "SELECT setting " " FROM pg_settings WHERE name = 'data_directory'"); log_debug("standby follow: %s\n", sqlquery); res = PQexec(conn, sqlquery); @@ -1210,12 +1278,12 @@ do_standby_follow(void) PQfinish(conn); /* write the recovery.conf file */ - if (!create_recovery_file(data_dir)) + if (!create_recovery_file(data_dir,NULL)) exit(ERR_BAD_CONFIG); /* Finally, restart the service */ /* We assume the pg_ctl script is in the PATH */ - snprintf(script, QUERY_STR_LEN, "pg_ctl -D %s -m fast restart", data_dir); + maxlen_snprintf(script, "pg_ctl -D %s -m fast restart", data_dir); r = system(script); if (r != 0) { @@ -1238,44 +1306,50 @@ void help(const char *progname) { printf(_("\n%s: Replicator manager \n"), progname); printf(_("Usage:\n")); - printf(_(" %s [OPTIONS] master {register}\n"), progname); - printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"), progname); + printf(_(" %s [OPTIONS] master {register}\n"), progname); + printf(_(" %s [OPTIONS] standby {register|clone|promote|follow}\n"), + progname); printf(_("\nGeneral options:\n")); - printf(_(" --help show this help, then exit\n")); - printf(_(" --version output version information, then exit\n")); - printf(_(" --verbose output verbose activity information\n")); + printf(_(" --help show this help, then exit\n")); + printf(_(" --version output version information, then exit\n")); + printf(_(" --verbose output verbose activity information\n")); printf(_("\nConnection options:\n")); - printf(_(" -d, --dbname=DBNAME database to connect to\n")); - printf(_(" -h, --host=HOSTNAME database server host or socket directory\n")); - printf(_(" -p, --port=PORT database server port\n")); - printf(_(" -U, --username=USERNAME database user name to connect as\n")); + printf(_(" -d, --dbname=DBNAME database to connect to\n")); + printf(_(" -h, --host=HOSTNAME database server host or socket directory\n")); + printf(_(" -p, --port=PORT database server port\n")); + printf(_(" -U, --username=USERNAME database user name to connect as\n")); printf(_("\nConfiguration options:\n")); - printf(_(" -D, --data-dir=DIR local directory where the files will be copied to\n")); - printf(_(" -f, --config_file=PATH path to the configuration file\n")); - printf(_(" -R, --remote-user=USERNAME database server username for rsync\n")); - printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n")); - printf(_(" -F, --force force potentially dangerous operations to happen\n")); + printf(_(" -D, --data-dir=DIR local directory where the files will be copied to\n")); + printf(_(" -f, --config_file=PATH path to the configuration file\n")); + printf(_(" -R, --remote-user=USERNAME database server username for rsync\n")); + printf(_(" -w, --wal-keep-segments=VALUE minimum value for the GUC wal_keep_segments (default: 5000)\n")); + printf(_(" -F, --force force potentially dangerous operations to happen\n")); printf(_("\n%s performs some tasks like clone a node, promote it "), progname); printf(_("or making follow another node and then exits.\n")); printf(_("COMMANDS:\n")); - printf(_(" master register - registers the master in a cluster\n")); - printf(_(" standby register - registers a standby in a cluster\n")); - printf(_(" standby clone [node] - allows creation of a new standby\n")); - printf(_(" standby promote - allows manual promotion of a specific standby into a ")); + printf(_(" master register - registers the master in a cluster\n")); + printf(_(" standby register - registers a standby in a cluster\n")); + printf(_(" standby clone [node] - allows creation of a new standby\n")); + printf(_(" standby promote - allows manual promotion of a specific standby into a ")); printf(_("new master in the event of a failover\n")); - printf(_(" standby follow - allows the standby to re-point itself to a new master\n")); + printf(_(" standby follow - allows the standby to re-point itself to a new master\n")); } +/* + * Creates a recovery file for a standby. + * + * Writes master_conninfo to recovery.conf if is non-NULL + */ static bool -create_recovery_file(const char *data_dir) +create_recovery_file(const char *data_dir, char *master_conninfo) { FILE *recovery_file; - char recovery_file_path[MAXFILENAME]; + char recovery_file_path[MAXLEN]; char line[MAXLEN]; - snprintf(recovery_file_path, MAXFILENAME, "%s/%s", data_dir, RECOVERY_FILE); + maxlen_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_FILE); recovery_file = fopen(recovery_file_path, "w"); if (recovery_file == NULL) @@ -1284,7 +1358,7 @@ create_recovery_file(const char *data_dir) return false; } - snprintf(line, MAXLEN, "standby_mode = 'on'\n"); + maxlen_snprintf(line, "standby_mode = 'on'\n"); if (fputs(line, recovery_file) == EOF) { log_err("recovery file could not be written, it could be necessary to create it manually\n"); @@ -1292,7 +1366,34 @@ create_recovery_file(const char *data_dir) return false; } - snprintf(line, MAXLEN, "primary_conninfo = 'host=%s port=%s'\n", runtime_options.host, runtime_options.masterport); + maxlen_snprintf(line, "primary_conninfo = 'host=%s port=%s'\n", runtime_options.host, runtime_options.masterport); + + /* + * Template a password into the connection string in recovery.conf. + * Sometimes this is passed by the user explicitly, and otherwise we try to + * get it into the environment + * + * XXX: This is pretty dirty, at least push this up to the caller rather + * than hitting environment variables at this level. + */ + if (master_conninfo == NULL) + { + char *password = getenv("PGPASSWORD"); + + if (password == NULL) + { + fprintf(stderr, + _("%s: Panic! PGPASSWORD not set, how can we get here?\n"), + progname); + exit(ERR_BAD_PASSWORD); + } + + maxlen_snprintf(line, + "primary_conninfo = 'host=%s port=%s password=%s'\n", + runtime_options.host, runtime_options.masterport, + password); + } + if (fputs(line, recovery_file) == EOF) { log_err("recovery file could not be written, it could be necessary to create it manually\n"); @@ -1308,39 +1409,42 @@ create_recovery_file(const char *data_dir) static int -copy_remote_files(char *host, char *remote_user, char *remote_path, char *local_path, bool is_directory) +copy_remote_files(char *host, char *remote_user, char *remote_path, + char *local_path, bool is_directory) { - char script[QUERY_STR_LEN]; - char rsync_flags[QUERY_STR_LEN]; - char host_string[QUERY_STR_LEN]; - int r; + char script[MAXLEN]; + char rsync_flags[MAXLEN]; + char host_string[MAXLEN]; + int r; - if (strnlen(options.rsync_options, QUERY_STR_LEN) == 0) - snprintf(rsync_flags, QUERY_STR_LEN, "--archive --checksum --compress --progress --rsh=ssh"); + if (strnlen(options.rsync_options, MAXLEN) == 0) + maxlen_snprintf( + rsync_flags, "%s", + "--archive --checksum --compress --progress --rsh=ssh"); else - strncpy(rsync_flags, options.rsync_options, QUERY_STR_LEN); + maxlen_snprintf(rsync_flags, "%s", options.rsync_options); if (runtime_options.force) strcat(rsync_flags, " --delete"); if (remote_user == NULL) { - snprintf(host_string, QUERY_STR_LEN, "%s",host); + maxlen_snprintf(host_string, "%s", host); } else { - snprintf(host_string, QUERY_STR_LEN, "%s@%s",remote_user,host); + maxlen_snprintf(host_string,"%s@%s",remote_user,host); } if (is_directory) { strcat(rsync_flags, " --exclude=pg_xlog* --exclude=pg_control --exclude=*.pid"); - snprintf(script, QUERY_STR_LEN, "rsync %s %s:%s/* %s", + maxlen_snprintf(script, "rsync %s %s:%s/* %s", rsync_flags, host_string, remote_path, local_path); } else { - snprintf(script, QUERY_STR_LEN, "rsync %s %s:%s %s/.", + maxlen_snprintf(script, "rsync %s %s:%s %s/.", rsync_flags, host_string, remote_path, local_path); } @@ -1450,13 +1554,26 @@ check_parameters_for_action(const int action) break; case STANDBY_CLONE: /* - * To clone a master into a standby we need connection parameters - * repmgr.conf is useless because we don't have a server running - * in the standby + * Issue a friendly notice that the configuration file is not + * necessary nor read at all in when performing a STANDBY CLONE + * action. */ if (runtime_options.config_file[0]) { - log_notice("Only command line parameters for the connection to the master are used when issuing a STANDBY CLONE command.\n"); + log_notice("Only command line parameters for the connection " + "to the master are used when issuing a STANDBY CLONE command. " + "The passed configuration file is neither required nor used\n"); + } + /* + * To clone a master into a standby we need connection parameters + * repmgr.conf is useless because we don't have a server running in + * the standby; warn the user, but keep going. + */ + if (runtime_options.host == NULL) + { + log_notice("You need to use connection parameters to " + "the master when issuing a STANDBY CLONE command."); + ok = false; } need_a_node = false; break; diff --git a/repmgr.h b/repmgr.h index 6c2803e9..484ea10b 100644 --- a/repmgr.h +++ b/repmgr.h @@ -24,15 +24,12 @@ #include "getopt_long.h" #include "libpq-fe.h" +#include "strutil.h" #include "dbutils.h" #define PRIMARY_MODE 0 #define STANDBY_MODE 1 -#define MAXLEN 80 -#define MAXVERSIONSTR 16 -#define QUERY_STR_LEN 8192 - #include "config.h" #define MAXFILENAME 1024 #define MAXLINELENGTH 4096 @@ -56,6 +53,7 @@ #define ERR_DB_CON 6 #define ERR_DB_QUERY 7 #define ERR_PROMOTED 8 +#define ERR_BAD_PASSWORD 9 /* Run time options type */ typedef struct diff --git a/repmgr.sql b/repmgr.sql index f28e6d8f..75ef448f 100644 --- a/repmgr.sql +++ b/repmgr.sql @@ -1,3 +1,10 @@ +/* + * repmgr.sql + * + * Copyright (C) 2ndQuadrant, 2011 + * + */ + CREATE USER repmgr; CREATE SCHEMA repmgr; @@ -5,27 +12,25 @@ CREATE SCHEMA repmgr; * The table repl_nodes keeps information about all machines in * a cluster */ -drop table if exists repl_nodes cascade; CREATE TABLE repl_nodes ( - id integer primary key, - cluster text not null, -- Name to identify the cluster - conninfo text not null + id integer primary key, + cluster text not null, -- Name to identify the cluster + conninfo text not null ); ALTER TABLE repl_nodes OWNER TO repmgr; /* - * Keeps monitor info about every node and their relative "position" + * Keeps monitor info about every node and their relative "position" * to primary */ -drop table if exists repl_monitor cascade; CREATE TABLE repl_monitor ( primary_node INTEGER NOT NULL, standby_node INTEGER NOT NULL, - last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, - last_wal_primary_location TEXT NOT NULL, + last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL, + last_wal_primary_location TEXT NOT NULL, last_wal_standby_location TEXT NOT NULL, - replication_lag BIGINT NOT NULL, - apply_lag BIGINT NOT NULL + replication_lag BIGINT NOT NULL, + apply_lag BIGINT NOT NULL ); ALTER TABLE repl_monitor OWNER TO repmgr; @@ -33,21 +38,20 @@ ALTER TABLE repl_monitor OWNER TO repmgr; /* * This view shows the latest monitor info about every node. * Interesting thing to see: - * replication_lag: in bytes (this is how far the latest xlog record + * replication_lag: in bytes (this is how far the latest xlog record * we have received is from master) * apply_lag: in bytes (this is how far the latest xlog record - * we have applied is from the latest record we + * we have applied is from the latest record we * have received) * time_lag: how many seconds are we from being up-to-date with master */ -drop view if exists repl_status; CREATE VIEW repl_status AS WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node ORDER BY last_monitor_time desc) FROM repl_monitor) -SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, - last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, - pg_size_pretty(apply_lag) apply_lag, +SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, + last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, + pg_size_pretty(apply_lag) apply_lag, age(now(), last_monitor_time) AS time_lag FROM monitor_info a WHERE row_number = 1; diff --git a/repmgrd.c b/repmgrd.c index c02da090..52fb90cb 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -29,6 +29,7 @@ #include "repmgr.h" #include "config.h" #include "log.h" +#include "strutil.h" #include "libpq/pqsignal.h" @@ -40,6 +41,7 @@ PGconn *myLocalConn = NULL; /* Primary info */ t_configuration_options primary_options; + PGconn *primaryConn = NULL; char sqlquery[QUERY_STR_LEN]; @@ -52,7 +54,8 @@ char repmgr_schema[MAXLEN]; /* * should initialize with {0} to be ANSI complaint ? but this raises - * error with gcc -Wall */ + * error with gcc -Wall + */ t_configuration_options config = {}; static void help(const char* progname); @@ -69,22 +72,22 @@ static void handle_sigint(SIGNAL_ARGS); static void setup_cancel_handler(void); #define CloseConnections() \ - if (PQisBusy(primaryConn) == 1) \ - CancelQuery(); \ - if (myLocalConn != NULL) \ - PQfinish(myLocalConn); \ - if (primaryConn != NULL && primaryConn != myLocalConn) \ - PQfinish(primaryConn); + if (PQisBusy(primaryConn) == 1) \ + CancelQuery(); \ + if (myLocalConn != NULL) \ + PQfinish(myLocalConn); \ + if (primaryConn != NULL && primaryConn != myLocalConn) \ + PQfinish(primaryConn); /* * Every 3 seconds, insert monitor info */ -#define MonitorCheck() \ - for (;;) \ - { \ - MonitorExecute(); \ - sleep(3); \ - } +#define MonitorCheck() \ + for (;;) \ + { \ + MonitorExecute(); \ + sleep(3); \ + } int @@ -175,7 +178,8 @@ main(int argc, char **argv) else { /* I need the id of the primary as well as a connection to it */ - primaryConn = getMasterConnection(myLocalConn, local_options.node, local_options.cluster_name, &primary_options.node); + primaryConn = getMasterConnection(myLocalConn, local_options.node, + local_options.cluster_name, &primary_options.node,NULL); if (primaryConn == NULL) exit(ERR_BAD_CONFIG); } @@ -187,6 +191,10 @@ main(int argc, char **argv) MonitorCheck(); } + /* Prevent a double-free */ + if (primaryConn == myLocalConn) + myLocalConn = NULL; + /* close the connection to the database and cleanup */ CloseConnections(); @@ -245,7 +253,8 @@ MonitorExecute(void) log_err(_("We couldn't reconnect to master. Now checking if another node has been promoted.")); for (connection_retries = 0; connection_retries < 6; connection_retries++) { - primaryConn = getMasterConnection(myLocalConn, local_options.node, local_options.cluster_name, &primary_options.node); + primaryConn = getMasterConnection(myLocalConn, local_options.node, + local_options.cluster_name, &primary_options.node,NULL); if (PQstatus(primaryConn) == CONNECTION_OK) { /* Connected, we can continue the process so break the loop */ @@ -283,9 +292,10 @@ MonitorExecute(void) CancelQuery(); /* Get local xlog info */ - snprintf(sqlquery, QUERY_STR_LEN, - "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " - "pg_last_xlog_replay_location()"); + sqlquery_snprintf( + sqlquery, + "SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), " + "pg_last_xlog_replay_location()"); res = PQexec(myLocalConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -302,7 +312,7 @@ MonitorExecute(void) PQclear(res); /* Get primary xlog info */ - snprintf(sqlquery, QUERY_STR_LEN, "SELECT pg_current_xlog_location() "); + sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location() "); res = PQexec(primaryConn, sqlquery); if (PQresultStatus(res) != PGRES_TUPLES_OK) @@ -323,8 +333,8 @@ MonitorExecute(void) /* * Build the SQL to execute on primary */ - snprintf(sqlquery, - QUERY_STR_LEN, "INSERT INTO %s.repl_monitor " + sqlquery_snprintf(sqlquery, + "INSERT INTO %s.repl_monitor " "VALUES(%d, %d, '%s'::timestamp with time zone, " " '%s', '%s', " " %lld, %lld)", repmgr_schema, @@ -349,7 +359,7 @@ checkClusterConfiguration(void) { PGresult *res; - snprintf(sqlquery, QUERY_STR_LEN, "SELECT oid FROM pg_class " + sqlquery_snprintf(sqlquery, "SELECT oid FROM pg_class " " WHERE oid = '%s.repl_nodes'::regclass", repmgr_schema); res = PQexec(myLocalConn, sqlquery); @@ -363,8 +373,10 @@ checkClusterConfiguration(void) } /* - * If there isn't any results then we have not configured a primary node yet - * in repmgr or the connection string is pointing to the wrong database. + * If there isn't any results then we have not configured a primary node + * yet in repmgr or the connection string is pointing to the wrong + * database. + * * XXX if we are the primary, should we try to create the tables needed? */ if (PQntuples(res) == 0) @@ -387,7 +399,7 @@ checkNodeConfiguration(char *conninfo) /* * Check if we have my node information in repl_nodes */ - snprintf(sqlquery, QUERY_STR_LEN, "SELECT * FROM %s.repl_nodes " + sqlquery_snprintf(sqlquery, "SELECT * FROM %s.repl_nodes " " WHERE id = %d AND cluster = '%s' ", repmgr_schema, local_options.node, local_options.cluster_name); @@ -408,8 +420,9 @@ checkNodeConfiguration(char *conninfo) if (PQntuples(res) == 0) { PQclear(res); + /* Adding the node */ - snprintf(sqlquery, QUERY_STR_LEN, "INSERT INTO %s.repl_nodes " + sqlquery_snprintf(sqlquery, "INSERT INTO %s.repl_nodes " "VALUES (%d, '%s', '%s')", repmgr_schema, local_options.node, local_options.cluster_name, local_options.conninfo); diff --git a/strutil.c b/strutil.c new file mode 100644 index 00000000..0969597d --- /dev/null +++ b/strutil.c @@ -0,0 +1,72 @@ +/* + * strutil.c + * + * Copyright (C) 2ndQuadrant, 2011 + * + */ + +#include +#include +#include + +#include "strutil.h" + +static int xvsnprintf(char *str, size_t size, const char *format, va_list ap); + + +static int +xvsnprintf(char *str, size_t size, const char *format, va_list ap) +{ + int retval; + + retval = vsnprintf(str, size, format, ap); + + if (retval >= size) + { + fprintf(stderr, "Buffer not large enough to format entire string\n"); + exit(255); + } + + return retval; +} + + +int +xsnprintf(char *str, size_t size, const char *format, ...) +{ + va_list arglist; + int retval; + + va_start(arglist, format); + retval = xvsnprintf(str, size, format, arglist); + va_end(arglist); + + return retval; +} + + +int +sqlquery_snprintf(char *str, const char *format, ...) +{ + va_list arglist; + int retval; + + va_start(arglist, format); + retval = xvsnprintf(str, QUERY_STR_LEN, format, arglist); + va_end(arglist); + + return retval; +} + + +int maxlen_snprintf(char *str, const char *format, ...) +{ + va_list arglist; + int retval; + + va_start(arglist, format); + retval = xvsnprintf(str, MAXLEN, format, arglist); + va_end(arglist); + + return retval; +} diff --git a/strutil.h b/strutil.h new file mode 100644 index 00000000..959448fe --- /dev/null +++ b/strutil.h @@ -0,0 +1,25 @@ +/* + * strutil.h + * + * Copyright (C) 2ndQuadrant, 2010-2011 + * + */ + +#ifndef _STRUTIL_H_ +#define _STRUTIL_H_ + +#include + + +#define QUERY_STR_LEN 8192 +#define MAXLEN 1024 +#define MAXLINELENGTH 4096 +#define MAXVERSIONSTR 16 +#define MAXCONNINFO 1024 + + +extern int xsnprintf(char *str, size_t size, const char *format, ...); +extern int sqlquery_snprintf(char *str, const char *format, ...); +extern int maxlen_snprintf(char *str, const char *format, ...); + +#endif /* _STRUTIL_H_ */ diff --git a/uninstall_repmgr.sql b/uninstall_repmgr.sql new file mode 100644 index 00000000..498a9585 --- /dev/null +++ b/uninstall_repmgr.sql @@ -0,0 +1,13 @@ +/* + * uninstall_repmgr.sql + * + * Copyright (C) 2ndQuadrant, 2010-2011 + * + */ + +DROP TABLE IF EXISTS repl_nodes; +DROP TABLE IF EXISTS repl_monitor; +DROP VIEW IF EXISTS repl_status; + +DROP SCHEMA repmgr; +DROP USER repmgr;