Compare commits

..

9 Commits

Author SHA1 Message Date
Ian Barwick
5ad674edff Bump version
3.1.1
2016-02-23 15:56:24 +09:00
Ian Barwick
ac09bad89c Minor fixes to README.md 2016-02-23 14:37:59 +09:00
Ian Barwick
009d92fec8 Ensure witness node is registered before the repl_nodes table is copied
This fixes a bug introduced into the previous commit, where the
witness node was registered last to prevent a spurious node record
being created even if witness server creation failed.
2016-02-23 14:37:54 +09:00
Martin
b3d8a68a1d Fix a few paragraphs from the README.md. 2016-02-23 14:37:48 +09:00
Ian Barwick
05b47cb2a8 Prevent repmgr/repmgrd running as root 2016-02-23 14:37:44 +09:00
Ian Barwick
dc542a1b7d Better handling of errors during witness creation
Ensure witness is only registered after all steps for creation
have been successfully completed.

Also write an event record if connection could not be made to
the witness server after initial creation.

This addresses GitHub issue #146.
2016-02-23 14:37:39 +09:00
Ian Barwick
6ce8058749 witness creation: extract database and user names from the local conninfo string
99.9% of the time they'll be the same as the primary connection, but
it's more consistent to use the provided local conninfo string
(from which the port is already extracted).
2016-02-23 14:37:31 +09:00
Ian Barwick
2edcac77f0 README.md: update witness server section 2016-02-23 14:37:27 +09:00
Ian Barwick
f740374392 Add '-P/--pwprompt' option for "repmgr create witness"
Optionally prompt for superuser and repmgr user when creating a witness.
This ensures a password can be provided if the primary's pg_hba.conf
mandates it.

This deprecates '--initdb-no-pwprompt'; and changes the default behaviour of
"repmgr create witness", which previously required a superuser password
unless '--initdb-no-pwprompt' was supplied.

This behaviour is more consistent with other PostgreSQL utilities such
as createuser.

Partial fix for GitHub issue #145.
2016-02-23 14:37:23 +09:00
16 changed files with 180 additions and 624 deletions

11
HISTORY
View File

@@ -1,13 +1,4 @@
3.1.2 2016-04-12
Fix pg_ctl path generation in do_standby_switchover() (Ian)
Regularly sync witness server repl_nodes table (Ian)
Documentation improvements (Gianni, dhyannataraj)
(Experimental) ensure repmgr handles failover slots when copying
in rsync mode (Craig, Ian)
rsync mode handling fixes (Martín)
Enable repmgr to compile against 9.6devel (Ian)
3.1.1 2016-02-24
3.1.1 2016-02-
Add '-P/--pwprompt' option for "repmgr create witness" (Ian)
Prevent repmgr/repmgrd running as root (Ian)

View File

@@ -2,32 +2,23 @@
# Makefile
# Copyright (c) 2ndQuadrant, 2010-2016
HEADERS = $(wildcard *.h)
repmgrd_OBJS = dbutils.o config.o repmgrd.o log.o strutil.o
repmgr_OBJS = dbutils.o check_dir.o config.o repmgr.o log.o strutil.o
DATA = repmgr.sql uninstall_repmgr.sql
PG_CPPFLAGS = -I$(libpq_srcdir)
PG_LIBS = $(libpq_pgport)
PG_LIBS = $(libpq_pgport)
all: repmgrd repmgr
all: repmgrd repmgr
$(MAKE) -C sql
repmgrd: $(repmgrd_OBJS)
$(CC) -o repmgrd $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
$(CC) $(CFLAGS) $(repmgrd_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgrd
$(MAKE) -C sql
repmgr: $(repmgr_OBJS)
$(CC) -o repmgr $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS)
# Make all objects depend on all include files. This is a bit of a
# shotgun approach, but the codebase is small enough that a complete rebuild
# is very fast anyway.
$(repmgr_OBJS): $(HEADERS)
$(repmgrd_OBJS): $(HEADERS)
$(CC) $(CFLAGS) $(repmgr_OBJS) $(PG_LIBS) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o repmgr
ifdef USE_PGXS
PG_CONFIG = pg_config
@@ -40,8 +31,8 @@ include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif
# XXX: This overrides the pgxs install target - we're building two binaries,
# which is not supported by pgxs.mk's PROGRAM construct.
# XXX: Try to use PROGRAM construct (see pgxs.mk) someday. Right now
# is overriding pgxs install.
install: install_prog install_ext
install_prog:
@@ -52,12 +43,6 @@ install_prog:
install_ext:
$(MAKE) -C sql install
# Distribution-specific package building targets
# ----------------------------------------------
#
# XXX we recommend using the PGDG-supplied packages where possible;
# see README.md for details.
install_rhel:
mkdir -p '$(DESTDIR)/etc/init.d/'
$(INSTALL_PROGRAM) RHEL/repmgrd.init '$(DESTDIR)/etc/init.d/repmgrd'

View File

@@ -259,20 +259,6 @@ The following replication settings must be included in `postgresql.conf`:
hot_standby = on
# If archive_mode is enabled, check that 'archive_command' is non empty
# (however it's not practical to check that it actually represents a valid
# command).
#
# From PostgreSQL 9.5, archive_mode can be one of 'off', 'on' or 'always'
# so for ease of backwards compatibility, rather than explicitly check for an
# enabled mode, check that it's not "off".
archive_mode = on
# Set archive command to a script or application that will safetly store
# you WALs in a secure place. /bin/true is an example of a command that
# ignores archiving. Use something more sensible.
archive_command = '/bin/true'
* * *
@@ -298,11 +284,11 @@ similar to the following:
local replication repmgr trust
host replication repmgr 127.0.0.1/32 trust
host replication repmgr 192.168.1.0/24 trust
host replication repmgr 192.168.1.0/32 trust
local repmgr repmgr trust
host repmgr repmgr 127.0.0.1/32 trust
host repmgr repmgr 192.168.1.0/24 trust
host repmgr repmgr 192.168.1.0/32 trust
Adjust according to your network environment and authentication requirements.
@@ -890,10 +876,6 @@ be set in `repmgr.conf`:
(See `repmgr.conf.sample` for further `repmgrd`-specific settings).
Additionally, `postgresql.conf` must contain the following line:
shared_preload_libraries = 'repmgr_funcs'
When `failover` is set to `automatic`, upon detecting failure of the current
master, `repmgrd` will execute one of `promote_command` or `follow_command`,
depending on whether the current server is becoming the new master or

View File

@@ -235,9 +235,6 @@ parse_config(t_configuration_options *options)
options->monitor_interval_secs = 2;
options->retry_promote_interval_secs = 300;
/* default to resyncing repl_nodes table every 30 seconds on the witness server */
options->witness_repl_nodes_sync_interval_secs = 30;
memset(options->event_notification_command, 0, sizeof(options->event_notification_command));
options->tablespace_mapping.head = NULL;
@@ -361,8 +358,6 @@ parse_config(t_configuration_options *options)
options->monitor_interval_secs = repmgr_atoi(value, "monitor_interval_secs", &config_errors, false);
else if (strcmp(name, "retry_promote_interval_secs") == 0)
options->retry_promote_interval_secs = repmgr_atoi(value, "retry_promote_interval_secs", &config_errors, false);
else if (strcmp(name, "witness_repl_nodes_sync_interval_secs") == 0)
options->witness_repl_nodes_sync_interval_secs = repmgr_atoi(value, "witness_repl_nodes_sync_interval_secs", &config_errors, false);
else if (strcmp(name, "use_replication_slots") == 0)
/* XXX we should have a dedicated boolean argument format */
options->use_replication_slots = repmgr_atoi(value, "use_replication_slots", &config_errors, false);

View File

@@ -75,14 +75,13 @@ typedef struct
char logfile[MAXLEN];
int monitor_interval_secs;
int retry_promote_interval_secs;
int witness_repl_nodes_sync_interval_secs;
int use_replication_slots;
char event_notification_command[MAXLEN];
EventNotificationList event_notifications;
TablespaceList tablespace_mapping;
} t_configuration_options;
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
#define T_CONFIGURATION_OPTIONS_INITIALIZER { "", -1, NO_UPSTREAM_NODE, "", MANUAL_FAILOVER, -1, "", "", "", "", "", "", "", -1, -1, -1, "", "", "", "", 0, 0, 0, "", { NULL, NULL }, {NULL, NULL} }
typedef struct ErrorListCell
{

View File

@@ -889,7 +889,7 @@ get_repmgr_schema_quoted(PGconn *conn)
bool
create_replication_slot(PGconn *conn, char *slot_name, int server_version_num)
create_replication_slot(PGconn *conn, char *slot_name)
{
char sqlquery[QUERY_STR_LEN];
int query_res;
@@ -926,19 +926,9 @@ create_replication_slot(PGconn *conn, char *slot_name, int server_version_num)
return false;
}
/* In 9.6 and later, reserve the LSN straight away */
if (server_version_num >= 90600)
{
sqlquery_snprintf(sqlquery,
"SELECT * FROM pg_create_physical_replication_slot('%s', TRUE)",
slot_name);
}
else
{
sqlquery_snprintf(sqlquery,
"SELECT * FROM pg_create_physical_replication_slot('%s')",
slot_name);
}
sqlquery_snprintf(sqlquery,
"SELECT * FROM pg_create_physical_replication_slot('%s')",
slot_name);
log_debug(_("create_replication_slot(): Creating slot '%s' on primary\n"), slot_name);
log_verbose(LOG_DEBUG, "create_replication_slot():\n%s\n", sqlquery);
@@ -1121,7 +1111,7 @@ set_config_bool(PGconn *conn, const char *config_param, bool state)
/*
* witness_copy_node_records()
* copy_configuration()
*
* Copy records in master's `repl_nodes` table to witness database
*
@@ -1129,49 +1119,29 @@ set_config_bool(PGconn *conn, const char *config_param, bool state)
* `repmgrd` after a failover event occurs
*/
bool
witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name)
{
char sqlquery[MAXLEN];
PGresult *res;
int i;
begin_transaction(witnessconn);
/* Defer constraints */
sqlquery_snprintf(sqlquery, "SET CONSTRAINTS ALL DEFERRED;");
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
res = PQexec(witnessconn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{
log_err(_("Unable to defer constraints:\n%s\n"),
PQerrorMessage(witnessconn));
rollback_transaction(witnessconn);
return false;
}
/* Truncate existing records */
sqlquery_snprintf(sqlquery, "TRUNCATE TABLE %s.repl_nodes", get_repmgr_schema_quoted(witnessconn));
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
res = PQexec(witnessconn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{
log_err(_("Unable to truncate witness servers's repl_nodes table:\n%s\n"),
PQerrorMessage(witnessconn));
rollback_transaction(witnessconn);
return false;
}
/* Get current records from primary */
sqlquery_snprintf(sqlquery,
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name, active FROM %s.repl_nodes",
"SELECT id, type, upstream_node_id, name, conninfo, priority, slot_name FROM %s.repl_nodes",
get_repmgr_schema_quoted(masterconn));
log_verbose(LOG_DEBUG, "witness_copy_node_records():\n%s\n", sqlquery);
log_verbose(LOG_DEBUG, "copy_configuration():\n%s\n", sqlquery);
res = PQexec(masterconn, sqlquery);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
@@ -1179,23 +1149,20 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster
log_err("Unable to retrieve node records from master:\n%s\n",
PQerrorMessage(masterconn));
PQclear(res);
rollback_transaction(witnessconn);
return false;
}
/* Insert primary records into witness table */
for (i = 0; i < PQntuples(res); i++)
{
bool node_record_created;
log_verbose(LOG_DEBUG,
"witness_copy_node_records(): writing node record for node %s (id: %s)\n",
PQgetvalue(res, i, 3),
"copy_configuration(): writing node record for node %s (id: %s)\n",
PQgetvalue(res, i, 4),
PQgetvalue(res, i, 0));
node_record_created = create_node_record(witnessconn,
"witness_copy_node_records",
"copy_configuration",
atoi(PQgetvalue(res, i, 0)),
PQgetvalue(res, i, 1),
strlen(PQgetvalue(res, i, 2))
@@ -1207,10 +1174,7 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster
atoi(PQgetvalue(res, i, 5)),
strlen(PQgetvalue(res, i, 6))
? PQgetvalue(res, i, 6)
: NULL,
(strcmp(PQgetvalue(res, i, 7), "t") == 0)
? true
: false
: NULL
);
if (node_record_created == false)
@@ -1219,16 +1183,11 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster
log_err("Unable to copy node record to witness database\n%s\n",
PQerrorMessage(witnessconn));
rollback_transaction(witnessconn);
return false;
}
}
PQclear(res);
/* And finished */
commit_transaction(witnessconn);
return true;
}
@@ -1241,7 +1200,7 @@ witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster
* XXX we should pass the record parameters as a struct.
*/
bool
create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active)
create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name)
{
char sqlquery[QUERY_STR_LEN];
char upstream_node_id[MAXLEN];
@@ -1282,8 +1241,8 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
sqlquery_snprintf(sqlquery,
"INSERT INTO %s.repl_nodes "
" (id, type, upstream_node_id, cluster, "
" name, conninfo, slot_name, priority, active) "
"VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i, %s) ",
" name, conninfo, slot_name, priority) "
"VALUES (%i, '%s', %s, '%s', '%s', '%s', %s, %i) ",
get_repmgr_schema_quoted(conn),
node,
type,
@@ -1292,8 +1251,7 @@ create_node_record(PGconn *conn, char *action, int node, char *type, int upstrea
node_name,
conninfo,
slot_name_buf,
priority,
active == true ? "TRUE" : "FALSE");
priority);
log_verbose(LOG_DEBUG, "create_node_record(): %s\n", sqlquery);
@@ -1333,7 +1291,7 @@ delete_node_record(PGconn *conn, int node, char *action)
if (action != NULL)
{
log_verbose(LOG_DEBUG, "delete_node_record(): action is \"%s\"\n", action);
log_verbose(LOG_DEBUG, "create_node_record(): action is \"%s\"\n", action);
}
res = PQexec(conn, sqlquery);

View File

@@ -115,14 +115,14 @@ int wait_connection_availability(PGconn *conn, long long timeout);
bool cancel_query(PGconn *conn, int timeout);
char *get_repmgr_schema(void);
char *get_repmgr_schema_quoted(PGconn *conn);
bool create_replication_slot(PGconn *conn, char *slot_name, int server_version_num);
bool create_replication_slot(PGconn *conn, char *slot_name);
int get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
bool drop_replication_slot(PGconn *conn, char *slot_name);
bool start_backup(PGconn *conn, char *first_wal_segment, bool fast_checkpoint);
bool stop_backup(PGconn *conn, char *last_wal_segment);
bool set_config_bool(PGconn *conn, const char *config_param, bool state);
bool witness_copy_node_records(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active);
bool copy_configuration(PGconn *masterconn, PGconn *witnessconn, char *cluster_name);
bool create_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name);
bool delete_node_record(PGconn *conn, int node, char *action);
int get_node_record(PGconn *conn, char *cluster, int node_id, t_node_info *node_info);
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
@@ -133,4 +133,3 @@ int get_node_replication_state(PGconn *conn, char *node_name, char *output)
t_server_type parse_node_type(const char *type);
int get_data_checksum_version(const char *data_directory);
#endif

View File

@@ -37,6 +37,5 @@
#define ERR_BAD_BASEBACKUP 14
#define ERR_INTERNAL 15
#define ERR_MONITORING_FAIL 16
#define ERR_BAD_BACKUP_LABEL 17
#endif /* _ERRCODE_H_ */

398
repmgr.c
View File

@@ -43,6 +43,7 @@
#include "repmgr.h"
#include <sys/types.h>
#include <dirent.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
@@ -121,8 +122,6 @@ static bool remote_command(const char *host, const char *user, const char *comma
static void format_db_cli_params(const char *conninfo, char *output);
static bool copy_file(const char *old_filename, const char *new_filename);
static void read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label);
/* Global variables */
static const char *keywords[6];
static const char *values[6];
@@ -148,7 +147,6 @@ static char path_buf[MAXLEN] = "";
ErrorList cli_errors = { NULL, NULL };
ErrorList cli_warnings = { NULL, NULL };
static struct BackupLabel backup_label;
int
main(int argc, char **argv)
@@ -161,8 +159,6 @@ main(int argc, char **argv)
{"username", required_argument, NULL, 'U'},
{"superuser", required_argument, NULL, 'S'},
{"data-dir", required_argument, NULL, 'D'},
/* alias for -D/--data-dir, following pg_ctl usage */
{"pgdata", required_argument, NULL, 'D'},
/* -l/--local-port is deprecated */
{"local-port", required_argument, NULL, 'l'},
{"config-file", required_argument, NULL, 'f'},
@@ -179,7 +175,7 @@ main(int argc, char **argv)
{"terse", required_argument, NULL, 't'},
{"mode", required_argument, NULL, 'm'},
{"remote-config-file", required_argument, NULL, 'C'},
/* deprecated from 3.2; replaced with -P/--pwprompt */
/* deprecated from 3.2; replaced with -P/----pwprompt */
{"initdb-no-pwprompt", no_argument, NULL, 1},
{"check-upstream-config", no_argument, NULL, 2},
{"recovery-min-apply-delay", required_argument, NULL, 3},
@@ -296,7 +292,7 @@ main(int argc, char **argv)
strncpy(runtime_options.superuser, optarg, MAXLEN);
break;
case 'D':
strncpy(runtime_options.dest_dir, optarg, MAXPGPATH);
strncpy(runtime_options.dest_dir, optarg, MAXFILENAME);
break;
case 'l':
/* -l/--local-port is deprecated */
@@ -420,7 +416,7 @@ main(int argc, char **argv)
case 6:
if (optarg != NULL)
{
strncpy(runtime_options.pg_rewind, optarg, MAXPGPATH);
strncpy(runtime_options.pg_rewind, optarg, MAXFILENAME);
}
pg_rewind_supplied = true;
break;
@@ -1075,8 +1071,7 @@ do_master_register(void)
options.node_name,
options.conninfo,
options.priority,
repmgr_slot_name_ptr,
true);
repmgr_slot_name_ptr);
if (record_created == false)
{
@@ -1177,8 +1172,9 @@ do_standby_register(void)
options.node_name,
options.conninfo,
options.priority,
repmgr_slot_name_ptr,
true);
repmgr_slot_name_ptr);
if (record_created == false)
{
@@ -1311,30 +1307,29 @@ do_standby_clone(void)
bool target_directory_provided = false;
bool external_config_file_copy_required = false;
char master_data_directory[MAXPGPATH];
char local_data_directory[MAXPGPATH];
char master_data_directory[MAXFILENAME];
char local_data_directory[MAXFILENAME];
char master_config_file[MAXPGPATH] = "";
char local_config_file[MAXPGPATH] = "";
char master_config_file[MAXFILENAME] = "";
char local_config_file[MAXFILENAME] = "";
bool config_file_outside_pgdata = false;
char master_hba_file[MAXPGPATH] = "";
char local_hba_file[MAXPGPATH] = "";
char master_hba_file[MAXFILENAME] = "";
char local_hba_file[MAXFILENAME] = "";
bool hba_file_outside_pgdata = false;
char master_ident_file[MAXPGPATH] = "";
char local_ident_file[MAXPGPATH] = "";
char master_ident_file[MAXFILENAME] = "";
char local_ident_file[MAXFILENAME] = "";
bool ident_file_outside_pgdata = false;
char master_control_file[MAXPGPATH] = "";
char local_control_file[MAXPGPATH] = "";
char master_control_file[MAXFILENAME] = "";
char local_control_file[MAXFILENAME] = "";
char *first_wal_segment = NULL;
char *last_wal_segment = NULL;
PQExpBufferData event_details;
/*
* If dest_dir (-D/--pgdata) was provided, this will become the new data
* directory (otherwise repmgr will default to the same directory as on the
@@ -1496,7 +1491,7 @@ do_standby_clone(void)
{
if (strcmp(PQgetvalue(res, i, 0), "data_directory") == 0)
{
strncpy(master_data_directory, PQgetvalue(res, i, 1), MAXPGPATH);
strncpy(master_data_directory, PQgetvalue(res, i, 1), MAXFILENAME);
}
else if (strcmp(PQgetvalue(res, i, 0), "config_file") == 0)
{
@@ -1504,7 +1499,7 @@ do_standby_clone(void)
{
config_file_outside_pgdata = true;
external_config_file_copy_required = true;
strncpy(master_config_file, PQgetvalue(res, i, 1), MAXPGPATH);
strncpy(master_config_file, PQgetvalue(res, i, 1), MAXFILENAME);
}
}
else if (strcmp(PQgetvalue(res, i, 0), "hba_file") == 0)
@@ -1513,7 +1508,7 @@ do_standby_clone(void)
{
hba_file_outside_pgdata = true;
external_config_file_copy_required = true;
strncpy(master_hba_file, PQgetvalue(res, i, 1), MAXPGPATH);
strncpy(master_hba_file, PQgetvalue(res, i, 1), MAXFILENAME);
}
}
else if (strcmp(PQgetvalue(res, i, 0), "ident_file") == 0)
@@ -1522,7 +1517,7 @@ do_standby_clone(void)
{
ident_file_outside_pgdata = true;
external_config_file_copy_required = true;
strncpy(master_ident_file, PQgetvalue(res, i, 1), MAXPGPATH);
strncpy(master_ident_file, PQgetvalue(res, i, 1), MAXFILENAME);
}
}
else
@@ -1538,20 +1533,20 @@ do_standby_clone(void)
*/
if (target_directory_provided)
{
strncpy(local_data_directory, runtime_options.dest_dir, MAXPGPATH);
strncpy(local_config_file, runtime_options.dest_dir, MAXPGPATH);
strncpy(local_hba_file, runtime_options.dest_dir, MAXPGPATH);
strncpy(local_ident_file, runtime_options.dest_dir, MAXPGPATH);
strncpy(local_data_directory, runtime_options.dest_dir, MAXFILENAME);
strncpy(local_config_file, runtime_options.dest_dir, MAXFILENAME);
strncpy(local_hba_file, runtime_options.dest_dir, MAXFILENAME);
strncpy(local_ident_file, runtime_options.dest_dir, MAXFILENAME);
}
/*
* Otherwise use the same data directory as on the remote host
*/
else
{
strncpy(local_data_directory, master_data_directory, MAXPGPATH);
strncpy(local_config_file, master_config_file, MAXPGPATH);
strncpy(local_hba_file, master_hba_file, MAXPGPATH);
strncpy(local_ident_file, master_ident_file, MAXPGPATH);
strncpy(local_data_directory, master_data_directory, MAXFILENAME);
strncpy(local_config_file, master_config_file, MAXFILENAME);
strncpy(local_hba_file, master_hba_file, MAXFILENAME);
strncpy(local_ident_file, master_ident_file, MAXFILENAME);
log_notice(_("setting data directory to: %s\n"), local_data_directory);
log_hint(_("use -D/--data-dir to explicitly specify a data directory\n"));
@@ -1591,7 +1586,7 @@ do_standby_clone(void)
*/
if (options.use_replication_slots)
{
if (create_replication_slot(upstream_conn, repmgr_slot_name, server_version_num) == false)
if (create_replication_slot(upstream_conn, repmgr_slot_name) == false)
{
PQfinish(upstream_conn);
exit(ERR_DB_QUERY);
@@ -1659,25 +1654,13 @@ do_standby_clone(void)
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
master_data_directory, local_data_directory,
true, server_version_num);
/*
Exit code 0 means no error, but we want to ignore exit code 24 as well
as rsync returns that code on "Partial transfer due to vanished source files".
It's quite common for this to happen on the data directory, particularly
with long running rsync on a busy server.
*/
if (r != 0 && r != 24)
if (r != 0)
{
log_warning(_("standby clone: failed copying master data directory '%s'\n"),
master_data_directory);
goto stop_backup;
}
/* Read backup label copied from primary */
/* XXX ensure this function does not exit on error as we'd need to stop the backup */
read_backup_label(local_data_directory, &backup_label);
printf("Label: %s; file: %s\n", backup_label.label, backup_label.start_wal_file);
/* Handle tablespaces */
sqlquery_snprintf(sqlquery,
@@ -1745,18 +1728,6 @@ do_standby_clone(void)
tblspc_dir_src.data, tblspc_dir_dst.data,
true, server_version_num);
/*
Exit code 0 means no error, but we want to ignore exit code 24 as well
as rsync returns that code on "Partial transfer due to vanished source files".
It's quite common for this to happen on the data directory, particularly
with long running rsync on a busy server.
*/
if (r != 0 && r != 24)
{
log_warning(_("standby clone: failed copying tablespace directory '%s'\n"),
tblspc_dir_src.data);
goto stop_backup;
}
/* Update symlinks in pg_tblspc */
if (mapping_found == true)
@@ -1981,55 +1952,44 @@ stop_backup:
exit(retval);
}
/*
* Clean up any $PGDATA subdirectories which may contain
* files which won't be removed by rsync and which could
* be stale or are otherwise not required
*/
if (runtime_options.rsync_only)
if (runtime_options.rsync_only && runtime_options.force)
{
char script[MAXLEN];
char label_path[MAXPGPATH];
if (runtime_options.force)
{
/*
* Remove any existing WAL from the target directory, since
* rsync's --exclude option doesn't do it.
*/
maxlen_snprintf(script, "rm -rf %s/pg_xlog/*",
local_data_directory);
r = system(script);
if (r != 0)
{
log_err(_("unable to empty local WAL directory %s/pg_xlog/\n"),
/*
* Remove any existing WAL from the target directory, since
* rsync's --exclude option doesn't do it.
*/
maxlen_snprintf(script, "rm -rf %s/pg_xlog/*",
local_data_directory);
exit(ERR_BAD_RSYNC);
}
r = system(script);
if (r != 0)
{
log_err(_("unable to empty local WAL directory %s/pg_xlog/\n"),
local_data_directory);
exit(ERR_BAD_RSYNC);
}
/*
* Remove any existing replication slot directories from previous use
* of this data directory; this matches the behaviour of a fresh
* pg_basebackup, which would usually result in an empty pg_replslot
* directory.
*
* If the backup label contains a nonzero
* 'MIN FAILOVER SLOT LSN' entry we retain the slots and let
* the server clean them up instead, matching pg_basebackup's
* behaviour when failover slots are enabled.
* Remove any replication slot directories; this matches the
* behaviour a base backup, which would result in an empty
* pg_replslot directory.
*
* NOTE: watch out for any changes in the replication
* slot directory name (as of 9.4: "pg_replslot") and
* functionality of replication slots
*/
if (server_version_num >= 90400 &&
backup_label.min_failover_slot_lsn == InvalidXLogRecPtr)
if (server_version_num >= 90400)
{
maxlen_snprintf(script, "rm -rf %s/pg_replslot/*",
local_data_directory);
log_debug("deleting pg_replslot directory contents\n");
r = system(script);
if (r != 0)
{
@@ -2038,13 +1998,6 @@ stop_backup:
exit(ERR_BAD_RSYNC);
}
}
/* delete the backup label file copied from the primary */
maxlen_snprintf(label_path, "%s/backup_label", local_data_directory);
if (0 && unlink(label_path) < 0 && errno != ENOENT)
{
log_warning(_("unable to delete backup label file %s\n"), label_path);
}
}
/* Finally, write the recovery.conf file */
@@ -2060,9 +2013,9 @@ stop_backup:
}
/*
* XXX It might be nice to provide an options to have repmgr start
* the PostgreSQL server automatically (e.g. with a custom pg_ctl
* command)
* XXX It might be nice to provide the following options:
* - have repmgr start the daemon automatically
* - provide a custom pg_ctl command
*/
log_notice(_("you can now start your PostgreSQL server\n"));
@@ -2076,28 +2029,7 @@ stop_backup:
log_hint(_("for example : /etc/init.d/postgresql start\n"));
}
/*
* XXX forgetting to (re) register the standby is a frequent cause
* of error; we should consider having repmgr automatically
* register the standby, either by default with an option
* "--no-register", or an option "--register".
*
* Note that "repmgr standby register" requires the standby to
* be running - if not, and we just update the node record,
* we'd have an incorrect representation of the replication cluster.
* Best combined with an automatic start of the server (see note
* above)
*/
/*
* XXX detect whether a record exists for this node already, and
* add a hint about using the -F/--force.
*/
log_hint(_("After starting the server, you need to register this standby with \"repmgr standby register\"\n"));
/* Log the event - if we can connect to the primary */
/* Log the event - if we could connect to the primary */
if (primary_conn != NULL)
{
@@ -2129,159 +2061,6 @@ stop_backup:
exit(retval);
}
static bool
parse_lsn(XLogRecPtr *ptr, const char *str)
{
uint32 high, low;
if (sscanf(str, "%x/%x", &high, &low) != 2)
return false;
*ptr = (((XLogRecPtr)high) << 32) + (XLogRecPtr)low;
return true;
}
static XLogRecPtr
parse_label_lsn(const char *label_key, const char *label_value)
{
XLogRecPtr ptr;
if (!parse_lsn(&ptr, label_value))
{
log_err(_("Couldn't parse backup label entry \"%s: %s\" as lsn"),
label_key, label_value);
exit(ERR_BAD_BACKUP_LABEL);
}
return ptr;
}
/*======================================
* Read entries of interest from the backup label.
*
* Sample backup label (with failover slots):
*
* START WAL LOCATION: 0/6000028 (file 000000010000000000000006)
* CHECKPOINT LOCATION: 0/6000060
* BACKUP METHOD: streamed
* BACKUP FROM: master
* START TIME: 2016-03-30 12:18:12 AWST
* LABEL: pg_basebackup base backup
* MIN FAILOVER SLOT LSN: 0/5000000
*
*======================================
*/
static void
read_backup_label(const char *local_data_directory, struct BackupLabel *out_backup_label)
{
char label_path[MAXPGPATH];
FILE *label_file;
int nmatches = 0;
char line[MAXLEN];
out_backup_label->start_wal_location = InvalidXLogRecPtr;
out_backup_label->start_wal_file[0] = '\0';
out_backup_label->checkpoint_location = InvalidXLogRecPtr;
out_backup_label->backup_from[0] = '\0';
out_backup_label->backup_method[0] = '\0';
out_backup_label->start_time[0] = '\0';
out_backup_label->label[0] = '\0';
out_backup_label->min_failover_slot_lsn = InvalidXLogRecPtr;
maxlen_snprintf(label_path, "%s/backup_label", local_data_directory);
label_file = fopen(label_path, "r");
if (label_file == NULL)
{
log_err(_("read_backup_label: could not open backup label file %s: %s"),
label_path, strerror(errno));
exit(ERR_BAD_BACKUP_LABEL);
}
log_info(_("read_backup_label: parsing backup label file '%s'\n"),
label_path);
while(fgets(line, sizeof line, label_file) != NULL)
{
char label_key[MAXLEN];
char label_value[MAXLEN];
char newline;
nmatches = sscanf(line, "%" MAXLEN_STR "[^:]: %" MAXLEN_STR "[^\n]%c",
label_key, label_value, &newline);
if (nmatches != 3)
break;
if (newline != '\n')
{
log_err(_("read_backup_label: line too long in backup label file. Line begins \"%s: %s\""),
label_key, label_value);
exit(ERR_BAD_BACKUP_LABEL);
}
log_debug("standby clone: got backup label entry \"%s: %s\"\n",
label_key, label_value);
if (strcmp(label_key, "START WAL LOCATION") == 0)
{
char start_wal_location[MAXLEN];
char wal_filename[MAXLEN];
nmatches = sscanf(label_value, "%" MAXLEN_STR "s (file %" MAXLEN_STR "[^)]", start_wal_location, wal_filename);
if (nmatches != 2)
{
log_err(_("read_backup_label: unable to parse \"START WAL LOCATION\" in backup label\n"));
exit(ERR_BAD_BACKUP_LABEL);
}
out_backup_label->start_wal_location =
parse_label_lsn(&label_key[0], start_wal_location);
(void) strncpy(out_backup_label->start_wal_file, wal_filename, MAXLEN);
out_backup_label->start_wal_file[MAXLEN-1] = '\0';
}
else if (strcmp(label_key, "CHECKPOINT LOCATION") == 0)
{
out_backup_label->checkpoint_location =
parse_label_lsn(&label_key[0], &label_value[0]);
}
else if (strcmp(label_key, "BACKUP METHOD") == 0)
{
(void) strncpy(out_backup_label->backup_method, label_value, MAXLEN);
out_backup_label->backup_method[MAXLEN-1] = '\0';
}
else if (strcmp(label_key, "BACKUP FROM") == 0)
{
(void) strncpy(out_backup_label->backup_from, label_value, MAXLEN);
out_backup_label->backup_from[MAXLEN-1] = '\0';
}
else if (strcmp(label_key, "START TIME") == 0)
{
(void) strncpy(out_backup_label->start_time, label_value, MAXLEN);
out_backup_label->start_time[MAXLEN-1] = '\0';
}
else if (strcmp(label_key, "LABEL") == 0)
{
(void) strncpy(out_backup_label->label, label_value, MAXLEN);
out_backup_label->label[MAXLEN-1] = '\0';
}
else if (strcmp(label_key, "MIN FAILOVER SLOT LSN") == 0)
{
out_backup_label->min_failover_slot_lsn =
parse_label_lsn(&label_key[0], &label_value[0]);
}
else
{
log_info("read_backup_label: ignored unrecognised backup label entry \"%s: %s\"",
label_key, label_value);
}
}
(void) fclose(label_file);
}
static void
do_standby_promote(void)
@@ -2455,7 +2234,7 @@ do_standby_follow(void)
int r,
retval;
char data_dir[MAXPGPATH];
char data_dir[MAXFILENAME];
bool success;
@@ -2538,7 +2317,7 @@ do_standby_follow(void)
master_id = get_master_node_id(master_conn, options.cluster_name);
strncpy(data_dir, runtime_options.dest_dir, MAXPGPATH);
strncpy(data_dir, runtime_options.dest_dir, MAXFILENAME);
}
@@ -2570,9 +2349,7 @@ do_standby_follow(void)
if (options.use_replication_slots)
{
int server_version_num = get_server_version(master_conn, NULL);
if (create_replication_slot(master_conn, repmgr_slot_name, server_version_num) == false)
if (create_replication_slot(master_conn, repmgr_slot_name) == false)
{
PQExpBufferData event_details;
initPQExpBuffer(&event_details);
@@ -3077,8 +2854,8 @@ do_standby_switchover(void)
*/
maxlen_snprintf(command,
"%s -D %s -m %s -W stop >/dev/null 2>&1 && echo 1 || echo 0",
make_pg_path("pg_ctl"),
"%s/pg_ctl -D %s -m %s -W stop >/dev/null 2>&1 && echo 1 || echo 0",
pg_bindir,
remote_data_directory,
runtime_options.pg_ctl_mode);
@@ -3520,17 +3297,10 @@ do_standby_restore_config(void)
}
while ((arcdir_ent = readdir(arcdir)) != NULL) {
struct stat statbuf;
char arcdir_ent_path[MAXPGPATH];
PQExpBufferData src_file;
PQExpBufferData dst_file;
snprintf(arcdir_ent_path, MAXPGPATH,
"%s/%s",
runtime_options.config_archive_dir,
arcdir_ent->d_name);
if (stat(arcdir_ent_path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
if (arcdir_ent->d_type != DT_REG)
{
continue;
}
@@ -3994,8 +3764,7 @@ do_witness_create(void)
options.node_name,
options.conninfo,
options.priority,
NULL,
true);
NULL);
if (record_created == false)
{
@@ -4012,7 +3781,7 @@ do_witness_create(void)
/* copy configuration from master, only repl_nodes is needed */
if (!witness_copy_node_records(masterconn, witnessconn, options.cluster_name))
if (!copy_configuration(masterconn, witnessconn, options.cluster_name))
{
create_event_record(masterconn,
&options,
@@ -4273,7 +4042,6 @@ test_ssh_connection(char *host, char *remote_user)
return r;
}
static int
copy_remote_files(char *host, char *remote_user, char *remote_path,
char *local_path, bool is_directory, int server_version_num)
@@ -4318,9 +4086,6 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
* See function 'sendDir()' in 'src/backend/replication/basebackup.c' -
* we're basically simulating what pg_basebackup does, but with rsync rather
* than the BASEBACKUP replication protocol command.
*
* *However* currently we'll always copy the contents of the 'pg_replslot'
* directory and delete later if appropriate.
*/
if (is_directory)
{
@@ -4349,6 +4114,12 @@ copy_remote_files(char *host, char *remote_user, char *remote_path,
appendPQExpBuffer(&rsync_flags, "%s",
" --exclude=pg_xlog/* --exclude=pg_log/* --exclude=pg_stat_tmp/*");
if (server_version_num >= 90400)
{
appendPQExpBuffer(&rsync_flags, "%s",
" --exclude=pg_replslot/*");
}
maxlen_snprintf(script, "rsync %s %s:%s/* %s",
rsync_flags.data, host_string, remote_path, local_path);
}
@@ -4707,7 +4478,7 @@ create_schema(PGconn *conn)
"CREATE TABLE %s.repl_nodes ( "
" id INTEGER PRIMARY KEY, "
" type TEXT NOT NULL CHECK (type IN('master','standby','witness')), "
" upstream_node_id INTEGER NULL REFERENCES %s.repl_nodes (id) DEFERRABLE, "
" upstream_node_id INTEGER NULL REFERENCES %s.repl_nodes (id), "
" cluster TEXT NOT NULL, "
" name TEXT NOT NULL, "
" conninfo TEXT NOT NULL, "
@@ -5074,45 +4845,22 @@ check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error)
}
else
{
char *levels_pre96[] = {
char *levels[] = {
"hot_standby",
"logical",
NULL,
};
/*
* Note that in 9.6+, "hot_standby" and "archive" are accepted as aliases
* for "replica", but current_setting() will of course always return "replica"
*/
char *levels_96plus[] = {
"replica",
"logical",
NULL,
};
char **levels;
int j = 0;
wal_error_message = _("parameter 'wal_level' must be set to 'hot_standby' or 'logical'");
if (server_version_num < 90600)
{
levels = (char **)levels_pre96;
wal_error_message = _("parameter 'wal_level' must be set to 'hot_standby' or 'logical'");
}
else
{
levels = (char **)levels_96plus;
wal_error_message = _("parameter 'wal_level' must be set to 'replica' or 'logical'");
}
do
for(; j < 2; j++)
{
i = guc_set(conn, "wal_level", "=", levels[j]);
if (i)
{
break;
}
j++;
} while (levels[j] != NULL);
}
}
if (i == 0 || i == -1)

View File

@@ -15,21 +15,21 @@
# schema (pattern: "repmgr_{cluster}"); while this name will be quoted
# to preserve case, we recommend using lower case and avoiding whitespace
# to facilitate easier querying of the repmgr views and tables.
#cluster=example_cluster
cluster=example_cluster
# Node ID and name
# (Note: we recommend to avoid naming nodes after their initial
# replication funcion, as this will cause confusion when e.g.
# "standby2" is promoted to primary)
#node=2 # a unique integer
#node_name=node2 # an arbitrary (but unique) string; we recommend using
node=2 # a unique integer
node_name=node2 # an arbitrary (but unique) string; we recommend using
# the server's hostname or another identifier unambiguously
# associated with the server to avoid confusion
# Database connection information as a conninfo string
# This must be accessible to all servers in the cluster; for details see:
# http://www.postgresql.org/docs/current/static/libpq-connect.html#LIBPQ-CONNSTRING
#conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
conninfo='host=192.168.204.104 dbname=repmgr_db user=repmgr_usr'
# Optional configuration items
# ============================
@@ -37,16 +37,15 @@
# Replication settings
# ---------------------
# When using cascading replication, a standby can connect to another
# upstream standby node which is specified by setting 'upstream_node'.
# In that case, the upstream node must exist before the new standby
# can be registered. If 'upstream_node' is not set, then the standby
# will connect directly to the primary node.
#upstream_node=1
# when using cascading replication and a standby is to be connected to an
# upstream standby, specify that node's ID with 'upstream_node'. The node
# must exist before the new standby can be registered. If a standby is
# to connect directly to a primary node, this parameter is not required.
upstream_node=1
# use physical replication slots - PostgreSQL 9.4 and later only
# (default: 0)
#use_replication_slots=0
use_replication_slots=0
# NOTE: 'max_replication_slots' should be configured for at least the
# number of standbys which will connect to the primary.
@@ -56,15 +55,15 @@
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
# (default: NOTICE)
#loglevel=NOTICE
loglevel=NOTICE
# Logging facility: possible values are STDERR or - for Syslog integration - one of LOCAL0, LOCAL1, ..., LOCAL7, USER
# (default: STDERR)
#logfacility=STDERR
logfacility=STDERR
# stderr can be redirected to an arbitrary file:
#
#logfile='/var/log/repmgr/repmgr.log'
logfile='/var/log/repmgr/repmgr.log'
# event notifications can be passed to an arbitrary external program
# together with the following parameters:
@@ -78,12 +77,12 @@
# the values provided for "%t" and "%d" will probably contain spaces,
# so should be quoted in the provided command configuration, e.g.:
#
#event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
# By default, all notifications will be passed; the notification types
# can be filtered to explicitly named ones:
#
#event_notifications=master_register,standby_register,witness_create
event_notifications=master_register,standby_register,witness_create
# Environment/command settings
@@ -91,17 +90,17 @@
# path to PostgreSQL binary directory (location of pg_ctl, pg_basebackup etc.)
# (if not provided, defaults to system $PATH)
#pg_bindir=/usr/bin/
pg_bindir=/usr/bin/
# external command options
#rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
#ssh_options=-o "StrictHostKeyChecking no"
rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
ssh_options=-o "StrictHostKeyChecking no"
# external command arguments. Values shown are examples.
#pg_ctl_options='-s'
#pg_basebackup_options='--xlog-method=s'
pg_ctl_options='-s'
pg_basebackup_options='--xlog-method=s'
# Standby clone settings
@@ -123,30 +122,27 @@
# Number of seconds to wait for a response from the primary server before
# deciding it has failed.
#master_response_timeout=60
master_response_timeout=60
# Number of attempts at what interval (in seconds) to try and
# connect to a server to establish its status (e.g. master
# during failover)
#reconnect_attempts=6
#reconnect_interval=10
reconnect_attempts=6
reconnect_interval=10
# Autofailover options
#failover=manual # one of 'automatic', 'manual'
failover=manual # one of 'automatic', 'manual'
# (default: manual)
#priority=100 # a value of zero or less prevents the node being promoted to primary
priority=100 # a value of zero or less prevents the node being promoted to primary
# (default: 100)
#promote_command='repmgr standby promote -f /path/to/repmgr.conf'
#follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
promote_command='repmgr standby promote -f /path/to/repmgr.conf'
follow_command='repmgr standby follow -f /path/to/repmgr.conf -W'
# monitoring interval in seconds; default is 2
#monitor_interval_secs=2
monitor_interval_secs=2
# change wait time for primary; before we bail out and exit when the primary
# disappears, we wait 'reconnect_attempts' * 'retry_promote_interval_secs'
# seconds; by default this would be half an hour, as 'retry_promote_interval_secs'
# default value is 300)
#retry_promote_interval_secs=300
# Number of seconds after which the witness server resyncs the repl_nodes table
#witness_repl_nodes_sync_interval_secs=15
retry_promote_interval_secs=300

View File

@@ -32,6 +32,8 @@
#define MIN_SUPPORTED_VERSION "9.3"
#define MIN_SUPPORTED_VERSION_NUM 90300
#include "config.h"
#define MAXFILENAME 1024
#define ERRBUFF_SIZE 512
#define DEFAULT_WAL_KEEP_SEGMENTS "5000"
@@ -55,8 +57,8 @@ typedef struct
char dbname[MAXLEN];
char host[MAXLEN];
char username[MAXLEN];
char dest_dir[MAXPGPATH];
char config_file[MAXPGPATH];
char dest_dir[MAXFILENAME];
char config_file[MAXFILENAME];
char remote_user[MAXLEN];
char superuser[MAXLEN];
char wal_keep_segments[MAXLEN];
@@ -79,7 +81,7 @@ typedef struct
/* parameter used by STANDBY SWITCHOVER */
char remote_config_file[MAXLEN];
char pg_rewind[MAXPGPATH];
char pg_rewind[MAXFILENAME];
/* parameter used by STANDBY {ARCHIVE_CONFIG | RESTORE_CONFIG} */
char config_archive_dir[MAXLEN];
/* parameter used by CLUSTER CLEANUP */
@@ -96,18 +98,6 @@ typedef struct
#define T_RUNTIME_OPTIONS_INITIALIZER { "", "", "", "", "", "", "", DEFAULT_WAL_KEEP_SEGMENTS, false, false, false, false, false, false, false, false, false, "smart", "", "", "", "", "", 0, "", "", "", false }
struct BackupLabel
{
XLogRecPtr start_wal_location;
char start_wal_file[MAXLEN];
XLogRecPtr checkpoint_location;
char backup_from[MAXLEN];
char backup_method[MAXLEN];
char start_time[MAXLEN];
char label[MAXLEN];
XLogRecPtr min_failover_slot_lsn;
};
extern char repmgr_schema[MAXLEN];
extern bool config_file_found;

123
repmgrd.c
View File

@@ -274,14 +274,7 @@ main(int argc, char **argv)
/* Retrieve record for this node from the local database */
node_info = get_node_info(my_local_conn, local_options.cluster_name, local_options.node);
/*
* No node record found - exit gracefully
*
* Note: it's highly unlikely this situation will occur when starting
* repmgrd on a witness, unless someone goes to the trouble of
* deleting the node record from the previously copied table.
*/
/* No node record found - exit gracefully */
if (node_info.node_id == NODE_NOT_FOUND)
{
log_err(_("No metadata record found for this node - terminating\n"));
@@ -298,12 +291,9 @@ main(int argc, char **argv)
*/
do
{
/* Timer for repl_nodes synchronisation interval */
int sync_repl_nodes_elapsed = 0;
/*
* Set my server mode, establish a connection to master and start
* monitoring
* monitor
*/
switch (node_info.type)
@@ -403,8 +393,8 @@ main(int argc, char **argv)
local_options.cluster_name);
master_conn = get_master_connection(my_local_conn,
local_options.cluster_name,
&master_options.node, NULL);
local_options.cluster_name,
&master_options.node, NULL);
if (master_conn == NULL)
{
@@ -412,7 +402,8 @@ main(int argc, char **argv)
initPQExpBuffer(&errmsg);
appendPQExpBuffer(&errmsg,
_("unable to connect to master node"));
_("unable to connect to master node '%s'"),
master_options.node_name);
log_err("%s\n", errmsg.data);
@@ -475,24 +466,6 @@ main(int argc, char **argv)
sleep(local_options.monitor_interval_secs);
/*
* On a witness node, regularly resync the repl_nodes table
* to keep up with any changes on the primary
*
* TODO: only resync the table if changes actually detected
*/
if (node_info.type == WITNESS)
{
sync_repl_nodes_elapsed += local_options.monitor_interval_secs;
log_debug(_("seconds since last node record sync: %i (sync interval: %i)\n"), sync_repl_nodes_elapsed, local_options.witness_repl_nodes_sync_interval_secs);
if(sync_repl_nodes_elapsed >= local_options.witness_repl_nodes_sync_interval_secs)
{
log_debug(_("Resyncing repl_nodes table\n"));
witness_copy_node_records(master_conn, my_local_conn, local_options.cluster_name);
sync_repl_nodes_elapsed = 0;
}
}
if (got_SIGHUP)
{
/*
@@ -507,7 +480,6 @@ main(int argc, char **argv)
}
got_SIGHUP = false;
}
if (failover_done)
{
log_debug(_("standby check loop will terminate\n"));
@@ -600,7 +572,7 @@ witness_monitor(void)
* XXX it would be neat to be able to handle this with e.g. table-based
* logical replication
*/
witness_copy_node_records(master_conn, my_local_conn, local_options.cluster_name);
copy_configuration(master_conn, my_local_conn, local_options.cluster_name);
break;
}
@@ -696,15 +668,15 @@ standby_monitor(void)
PGresult *res;
char monitor_standby_timestamp[MAXLEN];
char last_wal_master_location[MAXLEN];
char last_xlog_receive_location[MAXLEN];
char last_xlog_replay_location[MAXLEN];
char last_xact_replay_timestamp[MAXLEN];
bool last_xlog_receive_location_gte_replayed;
char last_wal_standby_received[MAXLEN];
char last_wal_standby_applied[MAXLEN];
char last_wal_standby_applied_timestamp[MAXLEN];
bool last_wal_standby_received_gte_replayed;
char sqlquery[QUERY_STR_LEN];
XLogRecPtr lsn_master_current_xlog_location;
XLogRecPtr lsn_last_xlog_receive_location;
XLogRecPtr lsn_last_xlog_replay_location;
XLogRecPtr lsn_master;
XLogRecPtr lsn_standby_received;
XLogRecPtr lsn_standby_applied;
int connection_retries,
ret;
@@ -998,10 +970,10 @@ standby_monitor(void)
}
strncpy(monitor_standby_timestamp, PQgetvalue(res, 0, 0), MAXLEN);
strncpy(last_xlog_receive_location, PQgetvalue(res, 0, 1), MAXLEN);
strncpy(last_xlog_replay_location, PQgetvalue(res, 0, 2), MAXLEN);
strncpy(last_xact_replay_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
last_xlog_receive_location_gte_replayed = (strcmp(PQgetvalue(res, 0, 4), "t") == 0)
strncpy(last_wal_standby_received, PQgetvalue(res, 0, 1), MAXLEN);
strncpy(last_wal_standby_applied, PQgetvalue(res, 0, 2), MAXLEN);
strncpy(last_wal_standby_applied_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
last_wal_standby_received_gte_replayed = (strcmp(PQgetvalue(res, 0, 4), "t") == 0)
? true
: false;
@@ -1010,13 +982,13 @@ standby_monitor(void)
/*
* In the unusual event of a standby becoming disconnected from the primary,
* while this repmgrd remains connected to the primary, subtracting
* "last_xlog_replay_location" from "lsn_last_xlog_receive_location" and coercing to
* "lsn_standby_applied" from "lsn_standby_received" and coercing to
* (long long unsigned int) will result in a meaningless, very large
* value which will overflow a BIGINT column and spew error messages into the
* PostgreSQL log. In the absence of a better strategy, skip attempting
* to insert a monitoring record.
*/
if (last_xlog_receive_location_gte_replayed == false)
if (last_wal_standby_received_gte_replayed == false)
{
log_verbose(LOG_WARNING,
"Invalid replication_lag value calculated - is this standby connected to its upstream?\n");
@@ -1038,40 +1010,29 @@ standby_monitor(void)
PQclear(res);
/* Calculate the lag */
lsn_master_current_xlog_location = lsn_to_xlogrecptr(last_wal_master_location, NULL);
lsn_last_xlog_receive_location = lsn_to_xlogrecptr(last_xlog_receive_location, NULL);
lsn_last_xlog_replay_location = lsn_to_xlogrecptr(last_xlog_replay_location, NULL);
lsn_master = lsn_to_xlogrecptr(last_wal_master_location, NULL);
lsn_standby_received = lsn_to_xlogrecptr(last_wal_standby_received, NULL);
lsn_standby_applied = lsn_to_xlogrecptr(last_wal_standby_applied, NULL);
/*
* Build the SQL to execute on master
*/
sqlquery_snprintf(sqlquery,
"INSERT INTO %s.repl_monitor "
" (primary_node, "
" standby_node, "
" last_monitor_time, "
" last_apply_time, "
" last_wal_primary_location, "
" last_wal_standby_location, "
" replication_lag, "
" apply_lag ) "
" VALUES(%d, "
" %d, "
" '%s'::TIMESTAMP WITH TIME ZONE, "
" '%s'::TIMESTAMP WITH TIME ZONE, "
" '%s', "
" '%s', "
" %llu, "
" %llu) ",
" (primary_node, standby_node, "
" last_monitor_time, last_apply_time, "
" last_wal_primary_location, last_wal_standby_location, "
" replication_lag, apply_lag ) "
" VALUES(%d, %d, "
" '%s'::TIMESTAMP WITH TIME ZONE, '%s'::TIMESTAMP WITH TIME ZONE, "
" '%s', '%s', "
" %llu, %llu) ",
get_repmgr_schema_quoted(master_conn),
master_options.node,
local_options.node,
monitor_standby_timestamp,
last_xact_replay_timestamp,
last_wal_master_location,
last_xlog_receive_location,
(long long unsigned int)(lsn_master_current_xlog_location - lsn_last_xlog_receive_location),
(long long unsigned int)(lsn_last_xlog_receive_location - lsn_last_xlog_replay_location));
master_options.node, local_options.node,
monitor_standby_timestamp, last_wal_standby_applied_timestamp,
last_wal_master_location, last_wal_standby_received,
(long long unsigned int)(lsn_master - lsn_standby_received),
(long long unsigned int)(lsn_standby_received - lsn_standby_applied));
/*
* Execute the query asynchronously, but don't check for a result. We will
@@ -1109,7 +1070,7 @@ do_master_failover(void)
XLogRecPtr xlog_recptr;
bool lsn_format_ok;
char last_xlog_replay_location[MAXLEN];
char last_wal_standby_applied[MAXLEN];
PGconn *node_conn = NULL;
@@ -1292,8 +1253,8 @@ do_master_failover(void)
" considered as new master and exit.\n"),
PQerrorMessage(my_local_conn));
PQclear(res);
sprintf(last_xlog_replay_location, "'%X/%X'", 0, 0);
update_shared_memory(last_xlog_replay_location);
sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0);
update_shared_memory(last_wal_standby_applied);
terminate(ERR_DB_QUERY);
}
/* write last location in shared memory */
@@ -1987,8 +1948,6 @@ check_node_configuration(void)
/* Adding the node */
log_info(_("adding node %d to cluster '%s'\n"),
local_options.node, local_options.cluster_name);
/* XXX use create_node_record() */
sqlquery_snprintf(sqlquery,
"INSERT INTO %s.repl_nodes"
" (id, cluster, name, conninfo, priority, witness) "
@@ -2110,7 +2069,7 @@ terminate(int retval)
static void
update_shared_memory(char *last_xlog_replay_location)
update_shared_memory(char *last_wal_standby_applied)
{
PGresult *res;
char sqlquery[QUERY_STR_LEN];
@@ -2118,7 +2077,7 @@ update_shared_memory(char *last_xlog_replay_location)
sprintf(sqlquery,
"SELECT %s.repmgr_update_standby_location('%s')",
get_repmgr_schema_quoted(my_local_conn),
last_xlog_replay_location);
last_wal_standby_applied);
/* If an error happens, just inform about that and continue */
res = PQexec(my_local_conn, sqlquery);

View File

@@ -1,31 +0,0 @@
/*
* Update a repmgr 3.1.1 installation to repmgr 3.1.2
* --------------------------------------------------
*
* This update is only required if repmgrd is being used in conjunction
* with a witness server.
*
* The new repmgr package should be installed first. Then
* carry out these steps:
*
* 1. (If repmgrd is used) stop any running repmgrd instances
* 2. On the master node, execute the SQL statement listed below
* 3. (If repmgrd is used) restart repmgrd
*/
/*
* If your repmgr installation is not included in your repmgr
* user's search path, please set the search path to the name
* of the repmgr schema to ensure objects are installed in
* the correct location.
*
* The repmgr schema is "repmgr_" + the cluster name defined in
* 'repmgr.conf'.
*/
-- SET search_path TO 'name_of_repmgr_schema';
BEGIN;
ALTER TABLE repl_nodes ALTER CONSTRAINT repl_nodes_upstream_node_id_fkey DEFERRABLE;
COMMIT;

View File

@@ -83,12 +83,7 @@ _PG_init(void)
* resources in repmgr_shmem_startup().
*/
RequestAddinShmemSpace(repmgr_memsize());
#if (PG_VERSION_NUM >= 90600)
RequestNamedLWLockTranche("repmgr", 1);
#else
RequestAddinLWLocks(1);
#endif
/*
* Install hooks.
@@ -133,11 +128,7 @@ repmgr_shmem_startup(void)
if (!found)
{
/* First time through ... */
#if (PG_VERSION_NUM >= 90600)
shared_state->lock = &(GetNamedLWLockTranche("repmgr"))->lock;
#else
shared_state->lock = LWLockAssign();
#endif
snprintf(shared_state->location,
sizeof(shared_state->location), "%X/%X", 0, 0);
}

View File

@@ -24,17 +24,12 @@
#include <stdlib.h>
#include "errcode.h"
#define QUERY_STR_LEN 8192
#define MAXLEN 1024
#define MAXLINELENGTH 4096
#define MAXVERSIONSTR 16
#define MAXCONNINFO 1024
/* Why? http://stackoverflow.com/a/5459929/398670 */
#define STR(x) CppAsString(x)
#define MAXLEN_STR STR(MAXLEN)
extern int
xsnprintf(char *str, size_t size, const char *format,...)

View File

@@ -1,6 +1,6 @@
#ifndef _VERSION_H_
#define _VERSION_H_
#define REPMGR_VERSION "3.2dev"
#define REPMGR_VERSION "3.1.1"
#endif