mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 07:06:30 +00:00
Compare commits
46 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a8232337d8 | ||
|
|
c9eb1bfcc0 | ||
|
|
db552dfbc7 | ||
|
|
9732f78565 | ||
|
|
eb7dca2919 | ||
|
|
c113102926 | ||
|
|
ed6a167915 | ||
|
|
fbbe7afd61 | ||
|
|
ae1fc93e48 | ||
|
|
7b4ee80af2 | ||
|
|
0b8755e278 | ||
|
|
d3e1937808 | ||
|
|
871d6fdee3 | ||
|
|
c7dfe9e040 | ||
|
|
5c92a9e057 | ||
|
|
aa5f025738 | ||
|
|
5b91a2d409 | ||
|
|
596a19ee37 | ||
|
|
23ff83b3b4 | ||
|
|
ba1f6bee0d | ||
|
|
da9c8f2491 | ||
|
|
64035ef701 | ||
|
|
da3a5ab1dc | ||
|
|
9d301b4789 | ||
|
|
c070c649f7 | ||
|
|
3b823396eb | ||
|
|
c19e7f1025 | ||
|
|
e4b5a1e19f | ||
|
|
f96cc3b906 | ||
|
|
a481ca7ce2 | ||
|
|
32dc450a09 | ||
|
|
34dbf64f50 | ||
|
|
ea653a8dbc | ||
|
|
50894b6124 | ||
|
|
94e187c476 | ||
|
|
de6284ae79 | ||
|
|
c54045bcd8 | ||
|
|
c0a53471e1 | ||
|
|
2eec8b5d79 | ||
|
|
c11e92cf2a | ||
|
|
f294d09034 | ||
|
|
26c597ef5a | ||
|
|
b8efbb7a15 | ||
|
|
3044696c05 | ||
|
|
6dc1969ad5 | ||
|
|
cb41ef1733 |
25
HISTORY
25
HISTORY
@@ -1,3 +1,27 @@
|
||||
4.0.3 2018-02-
|
||||
repmgr: improve switchover handling when "pg_ctl" used to control the
|
||||
server and logging output is not explicitly redirected (Ian)
|
||||
repmgr: improve switchover log messages and exit code when old primary could
|
||||
not be shut down cleanly (Ian)
|
||||
repmgr: check demotion candidate can make a replication connection to the
|
||||
promotion candidate before executing a switchover; GitHub #370 (Ian)
|
||||
repmgr: add check for sufficient walsenders/replication slots before executing
|
||||
a switchover; GitHub #371 (Ian)
|
||||
repmgr: add --dry-run mode to "repmgr standby follow"; GitHub #368 (Ian)
|
||||
repmgr: provide information about the primary node for "standby_register" and
|
||||
"standby_follow" event notifications; GitHub #375 (Ian)
|
||||
repmgr: add "standby_register_sync" event notification; GitHub #374 (Ian)
|
||||
repmgr: output any connection error messages in "cluster show"'s list of
|
||||
warnings; GitHub #369 (Ian)
|
||||
repmgr: ensure an inactive data directory can be deleted; GitHub #366 (Ian)
|
||||
repmgr: fix upstream node display in "repmgr node status"; GitHub #363 (fanf2)
|
||||
repmgr: improve/clarify documentation and update --help output for
|
||||
"primary unregister"; GitHub #373 (Ian)
|
||||
repmgr: fix parsing of "pg_basebackup_options"; GitHub #376 (Ian)
|
||||
repmgr: ensure "pg_subtrans" directory is created when cloning a standby in
|
||||
Barman mode (Ian)
|
||||
repmgr: fix primary node check in "witness register"; GitHub #377 (Ian)
|
||||
|
||||
4.0.2 2018-01-18
|
||||
repmgr: add missing -W option to getopt_long() invocation; GitHub #350 (Ian)
|
||||
repmgr: automatically create slot name if missing; GitHub #343 (Ian)
|
||||
@@ -21,7 +45,6 @@
|
||||
GitHub #344 (Ian)
|
||||
repmgr: delete any replication slots copied by pg_rewind; GitHub #334 (Ian)
|
||||
repmgr: fix configuration file sanity check; GitHub #342 (Ian)
|
||||
Improve event notification documentation (Ian)
|
||||
|
||||
4.0.0 2017-11-21
|
||||
Complete rewrite with many changes; for details see the repmgr 4.0.0 release
|
||||
|
||||
@@ -1,4 +1,2 @@
|
||||
/* config.h.in. Generated from configure.in by autoheader. */
|
||||
|
||||
/* Only build repmgr for BDR */
|
||||
#undef BDR_ONLY
|
||||
|
||||
152
configfile.c
152
configfile.c
@@ -671,7 +671,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
* Raise an error if a known parameter is provided with an empty
|
||||
* value. Currently there's no reason why empty parameters are needed;
|
||||
* if we want to accept those, we'd need to add stricter default
|
||||
* checking, as currently e.g. an empty `node` value will be converted
|
||||
* checking, as currently e.g. an empty `node_id` value will be converted
|
||||
* to '0'.
|
||||
*/
|
||||
if (known_parameter == true && !strlen(value))
|
||||
@@ -1600,31 +1600,109 @@ clear_event_notification_list(t_configuration_options *options)
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_options *backup_options, int server_version_num, ItemList *error_list)
|
||||
int
|
||||
parse_output_to_argv(const char *string, char ***argv_array)
|
||||
{
|
||||
int options_len = 0;
|
||||
char *options_string = NULL;
|
||||
char *options_string_ptr = NULL;
|
||||
int c = 1,
|
||||
argc_item = 1;
|
||||
char *argv_item = NULL;
|
||||
char **local_argv_array = NULL;
|
||||
ItemListCell *cell;
|
||||
|
||||
/*
|
||||
* Add parsed options to this list, then copy to an array to pass to
|
||||
* getopt
|
||||
*/
|
||||
static ItemList option_argv = {NULL, NULL};
|
||||
ItemList option_argv = {NULL, NULL};
|
||||
|
||||
char *argv_item = NULL;
|
||||
int c,
|
||||
argc_item = 1;
|
||||
options_len = strlen(string) + 1;
|
||||
options_string = pg_malloc0(options_len);
|
||||
options_string_ptr = options_string;
|
||||
|
||||
/* Copy the string before operating on it with strtok() */
|
||||
strncpy(options_string, string, options_len);
|
||||
|
||||
/* Extract arguments into a list and keep a count of the total */
|
||||
while ((argv_item = strtok(options_string_ptr, " ")) != NULL)
|
||||
{
|
||||
item_list_append(&option_argv, trim(argv_item));
|
||||
|
||||
argc_item++;
|
||||
|
||||
if (options_string_ptr != NULL)
|
||||
options_string_ptr = NULL;
|
||||
}
|
||||
|
||||
pfree(options_string);
|
||||
|
||||
/*
|
||||
* Array of argument values to pass to getopt_long - this will need to
|
||||
* include an empty string as the first value (normally this would be the
|
||||
* program name)
|
||||
*/
|
||||
local_argv_array = pg_malloc0(sizeof(char *) * (argc_item + 2));
|
||||
|
||||
/* Insert a blank dummy program name at the start of the array */
|
||||
local_argv_array[0] = pg_malloc0(1);
|
||||
|
||||
/*
|
||||
* Copy the previously extracted arguments from our list to the array
|
||||
*/
|
||||
for (cell = option_argv.head; cell; cell = cell->next)
|
||||
{
|
||||
int argv_len = strlen(cell->string) + 1;
|
||||
|
||||
local_argv_array[c] = (char *)pg_malloc0(argv_len);
|
||||
|
||||
strncpy(local_argv_array[c], cell->string, argv_len);
|
||||
|
||||
c++;
|
||||
}
|
||||
|
||||
local_argv_array[c] = NULL;
|
||||
|
||||
item_list_free(&option_argv);
|
||||
|
||||
*argv_array = local_argv_array;
|
||||
|
||||
return argc_item;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
free_parsed_argv(char ***argv_array)
|
||||
{
|
||||
char **local_argv_array = *argv_array;
|
||||
int i = 0;
|
||||
|
||||
while (local_argv_array[i] != NULL)
|
||||
{
|
||||
pfree((char *)local_argv_array[i]);
|
||||
i++;
|
||||
}
|
||||
|
||||
pfree((char **)local_argv_array);
|
||||
*argv_array = NULL;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_options *backup_options, int server_version_num, ItemList *error_list)
|
||||
{
|
||||
bool backup_options_ok = true;
|
||||
|
||||
int c = 0,
|
||||
argc_item = 0;
|
||||
|
||||
char **argv_array = NULL;
|
||||
ItemListCell *cell = NULL;
|
||||
|
||||
int optindex = 0;
|
||||
|
||||
struct option *long_options = NULL;
|
||||
|
||||
bool backup_options_ok = true;
|
||||
|
||||
/* We're only interested in these options */
|
||||
static struct option long_options_9[] =
|
||||
@@ -1650,56 +1728,12 @@ parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_opti
|
||||
if (!strlen(pg_basebackup_options))
|
||||
return backup_options_ok;
|
||||
|
||||
options_len = strlen(pg_basebackup_options) + 1;
|
||||
options_string = pg_malloc(options_len);
|
||||
options_string_ptr = options_string;
|
||||
|
||||
if (server_version_num >= 100000)
|
||||
long_options = long_options_10;
|
||||
else
|
||||
long_options = long_options_9;
|
||||
|
||||
/* Copy the string before operating on it with strtok() */
|
||||
strncpy(options_string, pg_basebackup_options, options_len);
|
||||
|
||||
/* Extract arguments into a list and keep a count of the total */
|
||||
while ((argv_item = strtok(options_string_ptr, " ")) != NULL)
|
||||
{
|
||||
item_list_append(&option_argv, argv_item);
|
||||
|
||||
argc_item++;
|
||||
|
||||
if (options_string_ptr != NULL)
|
||||
options_string_ptr = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Array of argument values to pass to getopt_long - this will need to
|
||||
* include an empty string as the first value (normally this would be the
|
||||
* program name)
|
||||
*/
|
||||
argv_array = pg_malloc0(sizeof(char *) * (argc_item + 2));
|
||||
|
||||
/* Insert a blank dummy program name at the start of the array */
|
||||
argv_array[0] = pg_malloc0(1);
|
||||
|
||||
c = 1;
|
||||
|
||||
/*
|
||||
* Copy the previously extracted arguments from our list to the array
|
||||
*/
|
||||
for (cell = option_argv.head; cell; cell = cell->next)
|
||||
{
|
||||
int argv_len = strlen(cell->string) + 1;
|
||||
|
||||
argv_array[c] = pg_malloc0(argv_len);
|
||||
|
||||
strncpy(argv_array[c], cell->string, argv_len);
|
||||
|
||||
c++;
|
||||
}
|
||||
|
||||
argv_array[c] = NULL;
|
||||
argc_item = parse_output_to_argv(pg_basebackup_options, &argv_array);
|
||||
|
||||
/* Reset getopt's optind variable */
|
||||
optind = 0;
|
||||
@@ -1743,15 +1777,7 @@ parse_pg_basebackup_options(const char *pg_basebackup_options, t_basebackup_opti
|
||||
backup_options_ok = false;
|
||||
}
|
||||
|
||||
pfree(options_string);
|
||||
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < argc_item + 2; i++)
|
||||
pfree(argv_array[i]);
|
||||
}
|
||||
pfree(argv_array);
|
||||
free_parsed_argv(&argv_array);
|
||||
|
||||
return backup_options_ok;
|
||||
}
|
||||
|
||||
@@ -248,7 +248,6 @@ typedef struct
|
||||
}
|
||||
|
||||
|
||||
|
||||
void set_progname(const char *argv0);
|
||||
const char *progname(void);
|
||||
|
||||
@@ -263,12 +262,15 @@ int repmgr_atoi(const char *s,
|
||||
ItemList *error_list,
|
||||
int minval);
|
||||
|
||||
|
||||
bool parse_pg_basebackup_options(const char *pg_basebackup_options,
|
||||
t_basebackup_options *backup_options,
|
||||
int server_version_num,
|
||||
ItemList *error_list);
|
||||
|
||||
int parse_output_to_argv(const char *string, char ***argv_array);
|
||||
void free_parsed_argv(char ***argv_array);
|
||||
|
||||
|
||||
/* called by repmgr-client and repmgrd */
|
||||
void exit_with_cli_errors(ItemList *error_list);
|
||||
void print_item_list(ItemList *item_list);
|
||||
|
||||
38
configure
vendored
38
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for repmgr 4.0.2.
|
||||
# Generated by GNU Autoconf 2.69 for repmgr 4.0.3.
|
||||
#
|
||||
# Report bugs to <pgsql-bugs@postgresql.org>.
|
||||
#
|
||||
@@ -582,8 +582,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='repmgr'
|
||||
PACKAGE_TARNAME='repmgr'
|
||||
PACKAGE_VERSION='4.0.2'
|
||||
PACKAGE_STRING='repmgr 4.0.2'
|
||||
PACKAGE_VERSION='4.0.3'
|
||||
PACKAGE_STRING='repmgr 4.0.3'
|
||||
PACKAGE_BUGREPORT='pgsql-bugs@postgresql.org'
|
||||
PACKAGE_URL='https://2ndquadrant.com/en/resources/repmgr/'
|
||||
|
||||
@@ -633,7 +633,6 @@ SHELL'
|
||||
ac_subst_files=''
|
||||
ac_user_opts='
|
||||
enable_option_checking
|
||||
with_bdr_only
|
||||
'
|
||||
ac_precious_vars='build_alias
|
||||
host_alias
|
||||
@@ -1179,7 +1178,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures repmgr 4.0.2 to adapt to many kinds of systems.
|
||||
\`configure' configures repmgr 4.0.3 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1240,15 +1239,10 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of repmgr 4.0.2:";;
|
||||
short | recursive ) echo "Configuration of repmgr 4.0.3:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
Optional Packages:
|
||||
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
|
||||
--without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no)
|
||||
--with-bdr-only BDR-only build
|
||||
|
||||
Some influential environment variables:
|
||||
PG_CONFIG Location to find pg_config for target PostgreSQL (default PATH)
|
||||
|
||||
@@ -1319,7 +1313,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
repmgr configure 4.0.2
|
||||
repmgr configure 4.0.3
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -1338,7 +1332,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by repmgr $as_me 4.0.2, which was
|
||||
It was created by repmgr $as_me 4.0.3, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -1694,20 +1688,6 @@ ac_config_headers="$ac_config_headers config.h"
|
||||
|
||||
|
||||
|
||||
|
||||
# Check whether --with-bdr_only was given.
|
||||
if test "${with_bdr_only+set}" = set; then :
|
||||
withval=$with_bdr_only;
|
||||
fi
|
||||
|
||||
if test "x$with_bdr_only" != "x"; then :
|
||||
|
||||
$as_echo "#define BDR_ONLY \"1\"" >>confdefs.h
|
||||
|
||||
|
||||
fi
|
||||
|
||||
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5
|
||||
$as_echo_n "checking for a sed that does not truncate output... " >&6; }
|
||||
if ${ac_cv_path_SED+:} false; then :
|
||||
@@ -2379,7 +2359,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by repmgr $as_me 4.0.2, which was
|
||||
This file was extended by repmgr $as_me 4.0.3, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -2442,7 +2422,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
repmgr config.status 4.0.2
|
||||
repmgr config.status 4.0.3
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
AC_INIT([repmgr], [4.0.2], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
|
||||
AC_INIT([repmgr], [4.0.3], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
|
||||
|
||||
AC_COPYRIGHT([Copyright (c) 2010-2018, 2ndQuadrant Ltd.])
|
||||
|
||||
@@ -6,12 +6,6 @@ AC_CONFIG_HEADER(config.h)
|
||||
|
||||
AC_ARG_VAR([PG_CONFIG], [Location to find pg_config for target PostgreSQL (default PATH)])
|
||||
|
||||
AC_ARG_WITH([bdr_only], [AS_HELP_STRING([--with-bdr-only], [BDR-only build])])
|
||||
AS_IF([test "x$with_bdr_only" != "x"],
|
||||
[AC_DEFINE([BDR_ONLY], ["1"], [Only build repmgr for BDR])]
|
||||
)
|
||||
|
||||
|
||||
AC_PROG_SED
|
||||
|
||||
if test -z "$PG_CONFIG"; then
|
||||
|
||||
169
dbutils.c
169
dbutils.c
@@ -437,15 +437,18 @@ free_conninfo_params(t_conninfo_param_list *param_list)
|
||||
|
||||
for (c = 0; c < param_list->size; c++)
|
||||
{
|
||||
if (param_list->keywords[c] != NULL)
|
||||
if (param_list->keywords != NULL && param_list->keywords[c] != NULL)
|
||||
pfree(param_list->keywords[c]);
|
||||
|
||||
if (param_list->values[c] != NULL)
|
||||
if (param_list->values != NULL && param_list->values[c] != NULL)
|
||||
pfree(param_list->values[c]);
|
||||
}
|
||||
|
||||
pfree(param_list->keywords);
|
||||
pfree(param_list->values);
|
||||
if (param_list->keywords != NULL)
|
||||
pfree(param_list->keywords);
|
||||
|
||||
if (param_list->values != NULL)
|
||||
pfree(param_list->values);
|
||||
}
|
||||
|
||||
|
||||
@@ -1255,7 +1258,7 @@ get_primary_node_id(PGconn *conn)
|
||||
initPQExpBuffer(&query);
|
||||
appendPQExpBuffer(&query,
|
||||
"SELECT node_id "
|
||||
" FROM repmgr.nodes "
|
||||
" FROM repmgr.nodes "
|
||||
" WHERE type = 'primary' "
|
||||
" AND active IS TRUE ");
|
||||
|
||||
@@ -1866,8 +1869,8 @@ get_node_record(PGconn *conn, int node_id, t_node_info *node_info)
|
||||
initPQExpBuffer(&query);
|
||||
appendPQExpBuffer(&query,
|
||||
"SELECT " REPMGR_NODES_COLUMNS
|
||||
" FROM repmgr.nodes "
|
||||
" WHERE node_id = %i",
|
||||
" FROM repmgr.nodes n "
|
||||
" WHERE n.node_id = %i",
|
||||
node_id);
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_node_record():\n %s", query.data);
|
||||
@@ -1894,8 +1897,8 @@ get_node_record_by_name(PGconn *conn, const char *node_name, t_node_info *node_i
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
"SELECT " REPMGR_NODES_COLUMNS
|
||||
" FROM repmgr.nodes "
|
||||
" WHERE node_name = '%s' ",
|
||||
" FROM repmgr.nodes n "
|
||||
" WHERE n.node_name = '%s' ",
|
||||
node_name);
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_node_record_by_name():\n %s", query.data);
|
||||
@@ -2020,8 +2023,8 @@ get_all_node_records(PGconn *conn, NodeInfoList *node_list)
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
" SELECT " REPMGR_NODES_COLUMNS
|
||||
" FROM repmgr.nodes "
|
||||
"ORDER BY node_id ");
|
||||
" FROM repmgr.nodes n "
|
||||
"ORDER BY n.node_id ");
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_all_node_records():\n%s", query.data);
|
||||
|
||||
@@ -2046,9 +2049,9 @@ get_downstream_node_records(PGconn *conn, int node_id, NodeInfoList *node_list)
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
" SELECT " REPMGR_NODES_COLUMNS
|
||||
" FROM repmgr.nodes "
|
||||
" WHERE upstream_node_id = %i "
|
||||
"ORDER BY node_id ",
|
||||
" FROM repmgr.nodes n "
|
||||
" WHERE n.upstream_node_id = %i "
|
||||
"ORDER BY n.node_id ",
|
||||
node_id);
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_downstream_node_records():\n%s", query.data);
|
||||
@@ -2075,11 +2078,11 @@ get_active_sibling_node_records(PGconn *conn, int node_id, int upstream_node_id,
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
" SELECT " REPMGR_NODES_COLUMNS
|
||||
" FROM repmgr.nodes "
|
||||
" WHERE upstream_node_id = %i "
|
||||
" AND node_id != %i "
|
||||
" AND active IS TRUE "
|
||||
"ORDER BY node_id ",
|
||||
" FROM repmgr.nodes n "
|
||||
" WHERE n.upstream_node_id = %i "
|
||||
" AND n.node_id != %i "
|
||||
" AND n.active IS TRUE "
|
||||
"ORDER BY n.node_id ",
|
||||
upstream_node_id,
|
||||
node_id);
|
||||
|
||||
@@ -2107,8 +2110,8 @@ get_node_records_by_priority(PGconn *conn, NodeInfoList *node_list)
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
" SELECT " REPMGR_NODES_COLUMNS
|
||||
" FROM repmgr.nodes "
|
||||
"ORDER BY priority DESC, node_name ");
|
||||
" FROM repmgr.nodes n "
|
||||
"ORDER BY n.priority DESC, n.node_name ");
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_node_records_by_priority():\n%s", query.data);
|
||||
|
||||
@@ -2123,7 +2126,11 @@ get_node_records_by_priority(PGconn *conn, NodeInfoList *node_list)
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
/*
|
||||
* return all node records together with their upstream's node name,
|
||||
* if available.
|
||||
*/
|
||||
bool
|
||||
get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
@@ -2145,15 +2152,61 @@ get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list)
|
||||
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("unable to retrieve node records"));
|
||||
log_detail("%s", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
|
||||
_populate_node_records(res, node_list);
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool
|
||||
get_downsteam_nodes_with_missing_slot(PGconn *conn, int this_node_id, NodeInfoList *node_list)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
PGresult *res = NULL;
|
||||
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
" SELECT " REPMGR_NODES_COLUMNS
|
||||
" FROM repmgr.nodes n "
|
||||
"LEFT JOIN pg_catalog.pg_replication_slots rs "
|
||||
" ON rs.slot_name = n.node_name "
|
||||
" WHERE rs.slot_name IS NULL "
|
||||
" AND n.node_id != %i ",
|
||||
this_node_id);
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_all_node_records_with_missing_slot():\n%s", query.data);
|
||||
|
||||
res = PQexec(conn, query.data);
|
||||
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("unable to retrieve node records"));
|
||||
log_detail("%s", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
|
||||
_populate_node_records(res, node_list);
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
create_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info)
|
||||
{
|
||||
@@ -2271,9 +2324,11 @@ _create_update_node_record(PGconn *conn, char *action, t_node_info *node_info)
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_error(_("unable to %s node record:\n %s"),
|
||||
log_error(_("unable to %s node record for node \"%s\" (ID: %i)"),
|
||||
action,
|
||||
PQerrorMessage(conn));
|
||||
node_info->node_name,
|
||||
node_info->node_id);
|
||||
log_detail("%s", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
@@ -2633,6 +2688,11 @@ get_node_replication_stats(PGconn *conn, int server_version_num, t_node_info *no
|
||||
PQExpBufferData query;
|
||||
PGresult *res = NULL;
|
||||
|
||||
if (server_version_num == UNKNOWN_SERVER_VERSION_NUM)
|
||||
server_version_num = get_server_version(conn, NULL);
|
||||
|
||||
Assert(server_version_num != UNKNOWN_SERVER_VERSION_NUM);
|
||||
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
@@ -2653,8 +2713,8 @@ get_node_replication_stats(PGconn *conn, int server_version_num, t_node_info *no
|
||||
appendPQExpBuffer(&query,
|
||||
" current_setting('max_replication_slots')::INT AS max_replication_slots, "
|
||||
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots) AS total_replication_slots, "
|
||||
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE active = TRUE) AS active_replication_slots, "
|
||||
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE active = FALSE) AS inactive_replication_slots, ");
|
||||
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE active IS TRUE) AS active_replication_slots, "
|
||||
" (SELECT COUNT(*) FROM pg_catalog.pg_replication_slots WHERE active IS FALSE) AS inactive_replication_slots, ");
|
||||
}
|
||||
|
||||
|
||||
@@ -3251,14 +3311,14 @@ _create_event(PGconn *conn, t_configuration_options *options, int node_id, char
|
||||
}
|
||||
break;
|
||||
case 'p':
|
||||
/* %p: former primary id ("repmgr standby switchover") */
|
||||
/* %p: primary id ("standby_switchover": former primary id) */
|
||||
src_ptr++;
|
||||
if (event_info->former_primary_id != UNKNOWN_NODE_ID)
|
||||
if (event_info->node_id != UNKNOWN_NODE_ID)
|
||||
{
|
||||
PQExpBufferData node_id;
|
||||
initPQExpBuffer(&node_id);
|
||||
appendPQExpBuffer(&node_id,
|
||||
"%i", event_info->former_primary_id);
|
||||
"%i", event_info->node_id);
|
||||
strlcpy(dst_ptr, node_id.data, end_ptr - dst_ptr);
|
||||
dst_ptr += strlen(dst_ptr);
|
||||
termPQExpBuffer(&node_id);
|
||||
@@ -3550,6 +3610,45 @@ get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record)
|
||||
return RECORD_FOUND;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
get_free_replication_slots(PGconn *conn)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
PGresult *res = NULL;
|
||||
int free_slots = 0;
|
||||
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
" SELECT pg_catalog.current_setting('max_replication_slots')::INT - "
|
||||
" COUNT(*) AS free_slots"
|
||||
" FROM pg_catalog.pg_replication_slots");
|
||||
|
||||
res = PQexec(conn, query.data);
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("unable to execute replication slot query"));
|
||||
log_detail("%s", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (PQntuples(res) == 0)
|
||||
{
|
||||
PQclear(res);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free_slots = atoi(PQgetvalue(res, 0, 0));
|
||||
|
||||
PQclear(res);
|
||||
return free_slots;
|
||||
}
|
||||
|
||||
|
||||
/* ==================== */
|
||||
/* tablespace functions */
|
||||
/* ==================== */
|
||||
@@ -4255,8 +4354,8 @@ is_bdr_repmgr(PGconn *conn)
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
"SELECT COUNT(*)"
|
||||
" FROM repmgr.nodes"
|
||||
" WHERE type != 'bdr' ");
|
||||
" FROM repmgr.nodes n"
|
||||
" WHERE n.type != 'bdr' ");
|
||||
|
||||
res = PQexec(conn, query.data);
|
||||
termPQExpBuffer(&query);
|
||||
@@ -4425,9 +4524,9 @@ get_bdr_other_node_name(PGconn *conn, int node_id, char *node_name)
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
" SELECT node_name "
|
||||
" FROM repmgr.nodes "
|
||||
" WHERE node_id != %i",
|
||||
" SELECT n.node_name "
|
||||
" FROM repmgr.nodes n "
|
||||
" WHERE n.node_id != %i",
|
||||
node_id);
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_bdr_other_node_name():\n %s", query.data);
|
||||
|
||||
16
dbutils.h
16
dbutils.h
@@ -28,7 +28,7 @@
|
||||
#include "strutil.h"
|
||||
#include "voting.h"
|
||||
|
||||
#define REPMGR_NODES_COLUMNS "node_id, type, upstream_node_id, node_name, conninfo, repluser, slot_name, location, priority, active, config_file, '' AS upstream_node_name "
|
||||
#define REPMGR_NODES_COLUMNS "n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, n.slot_name, n.location, n.priority, n.active, n.config_file, '' AS upstream_node_name "
|
||||
#define BDR_NODES_COLUMNS "node_sysid, node_timeline, node_dboid, node_status, node_name, node_local_dsn, node_init_from_dsn, node_read_only, node_seq_id"
|
||||
|
||||
#define ERRBUFF_SIZE 512
|
||||
@@ -79,6 +79,14 @@ typedef enum
|
||||
NODE_STATUS_UNCLEAN_SHUTDOWN
|
||||
} NodeStatus;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
CONN_UNKNOWN = -1,
|
||||
CONN_OK,
|
||||
CONN_BAD,
|
||||
CONN_ERROR
|
||||
} ConnectionStatus;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SLOT_UNKNOWN = -1,
|
||||
@@ -175,7 +183,7 @@ typedef struct s_event_info
|
||||
{
|
||||
char *node_name;
|
||||
char *conninfo_str;
|
||||
int former_primary_id;
|
||||
int node_id;
|
||||
} t_event_info;
|
||||
|
||||
#define T_EVENT_INFO_INITIALIZER { \
|
||||
@@ -410,7 +418,8 @@ void get_all_node_records(PGconn *conn, NodeInfoList *node_list);
|
||||
void get_downstream_node_records(PGconn *conn, int node_id, NodeInfoList *nodes);
|
||||
void get_active_sibling_node_records(PGconn *conn, int node_id, int upstream_node_id, NodeInfoList *node_list);
|
||||
void get_node_records_by_priority(PGconn *conn, NodeInfoList *node_list);
|
||||
void get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list);
|
||||
bool get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list);
|
||||
bool get_downsteam_nodes_with_missing_slot(PGconn *conn, int this_node_id, NodeInfoList *noede_list);
|
||||
|
||||
bool create_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info);
|
||||
bool update_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info);
|
||||
@@ -445,6 +454,7 @@ void create_slot_name(char *slot_name, int node_id);
|
||||
bool create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, PQExpBufferData *error_msg);
|
||||
bool drop_replication_slot(PGconn *conn, char *slot_name);
|
||||
RecordStatus get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
|
||||
int get_free_replication_slots(PGconn *conn);
|
||||
|
||||
/* tablespace functions */
|
||||
bool get_tablespace_name_by_location(PGconn *conn, const char *location, char *name);
|
||||
|
||||
198
dirutil.c
198
dirutil.c
@@ -21,6 +21,7 @@
|
||||
|
||||
#include <unistd.h>
|
||||
#include <dirent.h>
|
||||
#include <signal.h>
|
||||
#include <sys/stat.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
@@ -34,34 +35,33 @@
|
||||
#include "dirutil.h"
|
||||
#include "strutil.h"
|
||||
#include "log.h"
|
||||
#include "controldata.h"
|
||||
|
||||
static int unlink_dir_callback(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf);
|
||||
|
||||
/* PID can be negative if backend is standalone */
|
||||
typedef long pgpid_t;
|
||||
|
||||
|
||||
/*
|
||||
* make sure the directory either doesn't exist or is empty
|
||||
* we use this function to check the new data directory and
|
||||
* the directories for tablespaces
|
||||
* Check if a directory exists, and if so whether it is empty.
|
||||
*
|
||||
* This is the same check initdb does on the new PGDATA dir
|
||||
*
|
||||
* Returns 0 if nonexistent, 1 if exists and empty, 2 if not empty,
|
||||
* or -1 if trouble accessing directory
|
||||
* This function is used for checking both the data directory
|
||||
* and tablespace directories.
|
||||
*/
|
||||
int
|
||||
DataDirState
|
||||
check_dir(char *path)
|
||||
{
|
||||
DIR *chkdir;
|
||||
struct dirent *file;
|
||||
int result = 1;
|
||||
DIR *chkdir = NULL;
|
||||
struct dirent *file = NULL;
|
||||
int result = DIR_EMPTY;
|
||||
|
||||
errno = 0;
|
||||
|
||||
chkdir = opendir(path);
|
||||
|
||||
if (!chkdir)
|
||||
return (errno == ENOENT) ? 0 : -1;
|
||||
return (errno == ENOENT) ? DIR_NOENT : DIR_ERROR;
|
||||
|
||||
while ((file = readdir(chkdir)) != NULL)
|
||||
{
|
||||
@@ -73,25 +73,15 @@ check_dir(char *path)
|
||||
}
|
||||
else
|
||||
{
|
||||
result = 2; /* not empty */
|
||||
result = DIR_NOT_EMPTY;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
/*
|
||||
* This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but not in
|
||||
* released version
|
||||
*/
|
||||
if (GetLastError() == ERROR_NO_MORE_FILES)
|
||||
errno = 0;
|
||||
#endif
|
||||
|
||||
closedir(chkdir);
|
||||
|
||||
if (errno != 0)
|
||||
return -1; /* some kind of I/O error? */
|
||||
return DIR_ERROR; /* some kind of I/O error? */
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -106,12 +96,13 @@ create_dir(char *path)
|
||||
if (mkdir_p(path, 0700) == 0)
|
||||
return true;
|
||||
|
||||
log_error(_("unable to create directory \"%s\": %s"),
|
||||
path, strerror(errno));
|
||||
log_error(_("unable to create directory \"%s\""), path);
|
||||
log_detail("%s", strerror(errno));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
set_dir_permissions(char *path)
|
||||
{
|
||||
@@ -146,26 +137,6 @@ mkdir_p(char *path, mode_t omode)
|
||||
oumask = 0;
|
||||
retval = 0;
|
||||
|
||||
#ifdef WIN32
|
||||
/* skip network and drive specifiers for win32 */
|
||||
if (strlen(p) >= 2)
|
||||
{
|
||||
if (p[0] == '/' && p[1] == '/')
|
||||
{
|
||||
/* network drive */
|
||||
p = strstr(p + 2, "/");
|
||||
if (p == NULL)
|
||||
return 1;
|
||||
}
|
||||
else if (p[1] == ':' &&
|
||||
((p[0] >= 'a' && p[0] <= 'z') ||
|
||||
(p[0] >= 'A' && p[0] <= 'Z')))
|
||||
{
|
||||
/* local drive */
|
||||
p += 2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (p[0] == '/') /* Skip leading '/'. */
|
||||
++p;
|
||||
@@ -242,17 +213,91 @@ is_pg_dir(char *path)
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to determine if a PostgreSQL data directory is in use
|
||||
* by reading the pidfile. This is the same mechanism used by
|
||||
* "pg_ctl".
|
||||
*
|
||||
* This function will abort with appropriate log messages if a file error
|
||||
* is encountered, as the user will need to address the situation before
|
||||
* any further useful progress can be made.
|
||||
*/
|
||||
PgDirState
|
||||
is_pg_running(char *path)
|
||||
{
|
||||
long pid;
|
||||
FILE *pidf;
|
||||
|
||||
char pid_file[MAXPGPATH];
|
||||
|
||||
/* it's reasonable to assume the pidfile name will not change */
|
||||
snprintf(pid_file, MAXPGPATH, "%s/postmaster.pid", path);
|
||||
|
||||
pidf = fopen(pid_file, "r");
|
||||
if (pidf == NULL)
|
||||
{
|
||||
/*
|
||||
* No PID file - PostgreSQL shouldn't be running. From 9.3 (the
|
||||
* earliesty version we care about) removal of the PID file will
|
||||
* cause the postmaster to shut down, so it's highly unlikely
|
||||
* that PostgreSQL will still be running.
|
||||
*/
|
||||
if (errno == ENOENT)
|
||||
{
|
||||
return PG_DIR_NOT_RUNNING;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_error(_("unable to open PostgreSQL PID file \"%s\""), pid_file);
|
||||
log_detail("%s", strerror(errno));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* In the unlikely event we're unable to extract a PID from the PID file,
|
||||
* log a warning but assume we're not dealing with a running instance
|
||||
* as PostgreSQL should have shut itself down in these cases anyway.
|
||||
*/
|
||||
if (fscanf(pidf, "%ld", &pid) != 1)
|
||||
{
|
||||
/* Is the file empty? */
|
||||
if (ftell(pidf) == 0 && feof(pidf))
|
||||
{
|
||||
log_warning(_("PostgreSQL PID file \"%s\" is empty"), path);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_warning(_("invalid data in PostgreSQL PID file \"%s\""), path);
|
||||
}
|
||||
|
||||
return PG_DIR_NOT_RUNNING;
|
||||
}
|
||||
|
||||
fclose(pidf);
|
||||
|
||||
if (pid == getpid())
|
||||
return PG_DIR_NOT_RUNNING;
|
||||
|
||||
if (pid == getppid())
|
||||
return PG_DIR_NOT_RUNNING;
|
||||
|
||||
if (kill(pid, 0) == 0)
|
||||
return PG_DIR_RUNNING;
|
||||
|
||||
return PG_DIR_NOT_RUNNING;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
create_pg_dir(char *path, bool force)
|
||||
{
|
||||
bool pg_dir = false;
|
||||
|
||||
/* Check this directory could be used as a PGDATA dir */
|
||||
/* Check this directory can be used as a PGDATA dir */
|
||||
switch (check_dir(path))
|
||||
{
|
||||
case 0:
|
||||
/* dir not there, must create it */
|
||||
case DIR_NOENT:
|
||||
/* directory does not exist, attempt to create it */
|
||||
log_info(_("creating directory \"%s\"..."), path);
|
||||
|
||||
if (!create_dir(path))
|
||||
@@ -262,52 +307,51 @@ create_pg_dir(char *path, bool force)
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
/* Present but empty, fix permissions and use it */
|
||||
log_info(_("checking and correcting permissions on existing directory %s"),
|
||||
case DIR_EMPTY:
|
||||
/* exists but empty, fix permissions and use it */
|
||||
log_info(_("checking and correcting permissions on existing directory \"%s\""),
|
||||
path);
|
||||
|
||||
if (!set_dir_permissions(path))
|
||||
{
|
||||
log_error(_("unable to change permissions of directory \"%s\":\n %s"),
|
||||
path, strerror(errno));
|
||||
log_error(_("unable to change permissions of directory \"%s\""), path);
|
||||
log_detail("%s", strerror(errno));
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
/* Present and not empty */
|
||||
case DIR_NOT_EMPTY:
|
||||
/* exists but is not empty */
|
||||
log_warning(_("directory \"%s\" exists but is not empty"),
|
||||
path);
|
||||
|
||||
pg_dir = is_pg_dir(path);
|
||||
|
||||
if (pg_dir && force)
|
||||
if (is_pg_dir(path))
|
||||
{
|
||||
/* TODO: check DB state, if not running overwrite */
|
||||
|
||||
if (false)
|
||||
if (force == true)
|
||||
{
|
||||
log_notice(_("deleting existing data directory \"%s\""), path);
|
||||
log_notice(_("-F/--force provided - deleting existing data directory \"%s\""), path);
|
||||
nftw(path, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
|
||||
return true;
|
||||
}
|
||||
/* Let it continue */
|
||||
break;
|
||||
}
|
||||
else if (pg_dir && !force)
|
||||
{
|
||||
log_hint(_("This looks like a PostgreSQL directory.\n"
|
||||
"If you are sure you want to clone here, "
|
||||
"please check there is no PostgreSQL server "
|
||||
"running and use the -F/--force option"));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
default:
|
||||
else
|
||||
{
|
||||
if (force == true)
|
||||
{
|
||||
log_notice(_("deleting existing directory \"%s\""), path);
|
||||
nftw(path, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case DIR_ERROR:
|
||||
log_error(_("could not access directory \"%s\": %s"),
|
||||
path, strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
19
dirutil.h
19
dirutil.h
@@ -19,12 +19,29 @@
|
||||
#ifndef _DIRUTIL_H_
|
||||
#define _DIRUTIL_H_
|
||||
|
||||
typedef enum
|
||||
{
|
||||
DIR_ERROR = -1,
|
||||
DIR_NOENT,
|
||||
DIR_EMPTY,
|
||||
DIR_NOT_EMPTY
|
||||
} DataDirState;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
PG_DIR_ERROR = -1,
|
||||
PG_DIR_NOT_RUNNING,
|
||||
PG_DIR_RUNNING
|
||||
} PgDirState;
|
||||
|
||||
extern int mkdir_p(char *path, mode_t omode);
|
||||
extern bool set_dir_permissions(char *path);
|
||||
|
||||
extern int check_dir(char *path);
|
||||
extern DataDirState check_dir(char *path);
|
||||
extern bool create_dir(char *path);
|
||||
extern bool is_pg_dir(char *path);
|
||||
extern PgDirState is_pg_running(char *path);
|
||||
extern bool create_pg_dir(char *path, bool force);
|
||||
extern int rmdir_recursive(char *path);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -11,18 +11,150 @@
|
||||
before performing an upgrade, as there may be version-specific upgrade steps.
|
||||
</para>
|
||||
|
||||
|
||||
<para>
|
||||
See also: <xref linkend="upgrading-repmgr">
|
||||
</para>
|
||||
|
||||
<sect1 id="release-4.0.3">
|
||||
<title>Release 4.0.3</title>
|
||||
<para><emphasis>??? Feb ??, 2018</emphasis></para>
|
||||
|
||||
<para>
|
||||
&repmgr; 4.0.3 contains some bug fixes and and a number of
|
||||
usability enhancements related to logging/diagnostics,
|
||||
event notifications and pre-action checks.
|
||||
</para>
|
||||
|
||||
<sect2>
|
||||
<title>Usability enhancements</title>
|
||||
|
||||
<para>
|
||||
<itemizedlist>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
improve <command><link linkend="repmgr-standby-switchover">repmgr standby switchover</link></command>
|
||||
behaviour when <command>pg_ctl</command> is used to control the server and logging output is
|
||||
not explicitly redirected
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
improve <command><link linkend="repmgr-standby-switchover">repmgr standby switchover</link></command>
|
||||
log messages and provide new exit code <literal>ERR_SWITCHOVER_INCOMPLETE</literal> when old primary could
|
||||
not be shut down cleanly
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
add check to verify the demotion candidate can make a replication connection to the
|
||||
promotion candidate before executing a switchover (GitHub #370)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
add check for sufficient walsenders and replication slots on the promotion candidate before executing
|
||||
<command><link linkend="repmgr-standby-switchover">repmgr standby switchover</link></command>
|
||||
(GitHub #371)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
add --dry-run mode to <command><link linkend="repmgr-standby-switchover">repmgr standby follow</link></command>
|
||||
(GitHub #369)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
add <literal>standby_register_sync</literal> event notification, which is fired when
|
||||
<command><link linkend="repmgr-standby-register">repmgr standby register</link></command>
|
||||
is run with the <option>--wait-sync</option> option and the new or updated standby node
|
||||
record has synchronised to the standy (GitHub #374)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
when running <command><link linkend="repmgr-cluster-show">repmgr cluster show</link></command>,
|
||||
if any node is unreachable, output the error message encountered in the list of warnings
|
||||
(GitHub #369)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2>
|
||||
<title>Bug fixes</title>
|
||||
|
||||
<para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
ensure an inactive data directory can be overwritten when
|
||||
cloning a standby (GitHub #366)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-node-status">repmgr node status</link></command>
|
||||
upstream node display fixed (GitHub #363)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-primary-unregister">repmgr primary unregister</link></command>:
|
||||
clarify usage and fix <literal>--help</literal> output (GitHub #373)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
parsing of <varname>pg_basebackup_options</varname> fixed (GitHub #376)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
ensure the <filename>pg_subtrans</filename> directory is created when cloning a
|
||||
standby in Barman mode
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-witness-register">repmgr witness register</link></command>:
|
||||
fix primary node check (GitHub #377).
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
|
||||
|
||||
<sect1 id="release-4.0.2">
|
||||
<title>Release 4.0.2</title>
|
||||
<para><emphasis>Thu Jan 18, 2018</emphasis></para>
|
||||
|
||||
<para>
|
||||
repmgr 4.0.2 contains some bug fixes and minor usability enhancements.
|
||||
&repmgr; 4.0.2 contains some bug fixes and small usability enhancements.
|
||||
</para>
|
||||
<para>
|
||||
This release can be installed as a simple package upgrade from &repmgr; 4.0.1 or 4.0;
|
||||
<application>repmgrd</application> (if running) should be restarted.
|
||||
</para>
|
||||
|
||||
<sect2>
|
||||
<title>Usability enhancements</title>
|
||||
|
||||
@@ -121,7 +253,7 @@
|
||||
<para><emphasis>Wed Dec 13, 2017</emphasis></para>
|
||||
|
||||
<para>
|
||||
repmgr 4.0.1 is a bugfix release.
|
||||
&repmgr; 4.0.1 is a bugfix release.
|
||||
</para>
|
||||
<sect2>
|
||||
<title>Bug fixes</title>
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
<filename>repmgr.conf</filename>.
|
||||
</para>
|
||||
<para>
|
||||
This parameter accepts the following format placeholders:
|
||||
The following format placeholders are provided for all event notifications:
|
||||
</para>
|
||||
|
||||
<variablelist>
|
||||
@@ -84,18 +84,8 @@
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<term><option>%p</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
node ID of the demoted standby (<xref linkend="repmgr-standby-switchover"> only)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
<para>
|
||||
The values provided for <literal>%t</literal> and <literal>%d</literal>
|
||||
will probably contain spaces, so should be quoted in the provided command
|
||||
@@ -104,34 +94,60 @@
|
||||
event_notification_command='/path/to/some/script %n %e %s "%t" "%d"'
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Additionally the following format placeholders are available for the event
|
||||
type <varname>bdr_failover</varname> and optionally <varname>bdr_recovery</varname>:
|
||||
The following parameters are provided for a subset of event notifications:
|
||||
</para>
|
||||
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term><option>%p</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
node ID of the current primary (<xref linkend="repmgr-standby-register"> and <xref linkend="repmgr-standby-follow">)
|
||||
</para>
|
||||
<para>
|
||||
node ID of the demoted primary (<xref linkend="repmgr-standby-switchover"> only)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>%c</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
conninfo string of the next available node
|
||||
<literal>conninfo</literal> string of the primary node
|
||||
(<xref linkend="repmgr-standby-register"> and <xref linkend="repmgr-standby-follow">)
|
||||
</para>
|
||||
<para>
|
||||
<literal>conninfo</literal> string of the next available node
|
||||
(<varname>bdr_failover</varname> and <varname>bdr_recovery</varname>)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>%a</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
name of the next available node
|
||||
name of the current primary node (<xref linkend="repmgr-standby-register"> and <xref linkend="repmgr-standby-follow">)
|
||||
</para>
|
||||
<para>
|
||||
name of the next available node (<varname>bdr_failover</varname> and <varname>bdr_recovery</varname>)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
<para>
|
||||
These should always be quoted.
|
||||
The values provided for <literal>%c</literal> and <literal>%a</literal>
|
||||
will probably contain spaces, so should always be quoted.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
By default, all notification types will be passed to the designated script;
|
||||
the notification types can be filtered to explicitly named ones:
|
||||
the notification types can be filtered to explicitly named ones using the
|
||||
<varname>event_notifications</varname> parameter:
|
||||
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
@@ -144,6 +160,9 @@
|
||||
<listitem>
|
||||
<simpara><literal>standby_register</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>standby_register_sync</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>standby_unregister</literal></simpara>
|
||||
</listitem>
|
||||
@@ -186,6 +205,18 @@
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_failover_follow</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_upstream_disconnect</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_upstream_reconnect</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_promote_error</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_failover_promote</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>bdr_failover</literal></simpara>
|
||||
</listitem>
|
||||
@@ -204,6 +235,7 @@
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Note that under some circumstances (e.g. when no replication cluster primary
|
||||
could be located), it will not be possible to write an entry into the
|
||||
|
||||
@@ -178,8 +178,8 @@
|
||||
<para>
|
||||
In order to effectively manage a replication cluster, &repmgr; needs to store
|
||||
information about the servers in the cluster in a dedicated database schema.
|
||||
This schema is automatically by the &repmgr; extension, which is installed
|
||||
during the first step in initialising a &repmgr;-administered cluster
|
||||
This schema is automatically created by the &repmgr; extension, which is installed
|
||||
during the first step in initializing a &repmgr;-administered cluster
|
||||
(<command><link linkend="repmgr-primary-register">repmgr primary register</link></command>)
|
||||
and contains the following objects:
|
||||
<variablelist>
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
<chapter id="repmgrd-bdr">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>BDR</secondary>
|
||||
</indexterm>
|
||||
|
||||
<indexterm>
|
||||
<primary>BDR</primary>
|
||||
</indexterm>
|
||||
|
||||
<title>BDR failover with repmgrd</title>
|
||||
<para>
|
||||
&repmgr; 4.x provides support for monitoring BDR nodes and taking action in
|
||||
case one of the nodes fails.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
Due to the nature of BDR, it's only safe to use this solution for
|
||||
a two-node scenario. Introducing additional nodes will create an inherent
|
||||
risk of node desynchronisation if a node goes down without being cleanly
|
||||
removed from the cluster.
|
||||
</simpara>
|
||||
</note>
|
||||
<para>
|
||||
In contrast to streaming replication, there's no concept of "promoting" a new
|
||||
primary node with BDR. Instead, "failover" involves monitoring both nodes
|
||||
with `repmgrd` and redirecting queries from the failed node to the remaining
|
||||
active node. This can be done by using an
|
||||
<link linkend="event-notifications">event notification</link> script
|
||||
which is called by <application>repmgrd</application> to dynamically
|
||||
reconfigure a proxy server/connection pooler such as <application>PgBouncer</application>.
|
||||
</para>
|
||||
|
||||
<sect1 id="prerequisites" xreflable="BDR prequisites">
|
||||
</sect1>
|
||||
</chapter>
|
||||
|
||||
@@ -21,6 +21,10 @@
|
||||
|
||||
<refsect1>
|
||||
<title>Execution</title>
|
||||
<para>
|
||||
<command>repmgr primary unregister</command> should be run on the current primary,
|
||||
with the ID of the node to unregister passed as <option>--node-id</option>.
|
||||
</para>
|
||||
<para>
|
||||
Execute with the <literal>--dry-run</literal> option to check what would happen without
|
||||
actually unregistering the node.
|
||||
@@ -32,6 +36,34 @@
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
|
||||
<title>Options</title>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--dry-run</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Check prerequisites but don't actually unregister the primary.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--node-id</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
ID of the inactive primary to be unregistered.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
|
||||
@@ -48,14 +48,53 @@
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Options</title>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--dry-run</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Check prerequisites but don't actually follow a new standby.
|
||||
</para>
|
||||
<important>
|
||||
<para>
|
||||
This does not guarantee the standby can follow the primary; in
|
||||
particular, whether the primary and standby timelines have diverged,
|
||||
can currently only be determined by actually attempting to
|
||||
attach the standby to the primary.
|
||||
</para>
|
||||
</important>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-W</option></term>
|
||||
<term><option>--wait</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Wait for a primary to appear.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>standby_follow</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
||||
</para>
|
||||
<para>
|
||||
If provided, &repmgr; will subsitute the placeholders <literal>%p</literal> with the node ID of the primary
|
||||
being followed, <literal>%c</literal> with its <literal>conninfo</literal> string, and
|
||||
<literal>%a</literal> with its node name.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
<xref linkend="repmgr-node-rejoin">
|
||||
|
||||
@@ -57,16 +57,16 @@
|
||||
<refsect1 id="repmgr-standby-register-wait-sync" xreflabel="repmgr standby register --wait-sync">
|
||||
<title>Waiting for the registration to propagate to the standby</title>
|
||||
<para>
|
||||
Depending on your environment and workload, it may take some time for
|
||||
the standby's node record to propagate from the primary to the standby. Some
|
||||
actions (such as starting <application>repmgrd</application>) require that the standby's node record
|
||||
Depending on your environment and workload, it may take some time for the standby's node record
|
||||
to propagate from the primary to the standby. Some actions (such as starting
|
||||
<application>repmgrd</application>) require that the standby's node record
|
||||
is present and up-to-date to function correctly.
|
||||
</para>
|
||||
<para>
|
||||
By providing the option <literal>--wait-sync</literal> to the
|
||||
By providing the option <option>--wait-sync</option> to the
|
||||
<command>repmgr standby register</command> command, &repmgr; will wait
|
||||
until the record is synchronised before exiting. An optional timeout (in
|
||||
seconds) can be added to this option (e.g. <literal>--wait-sync=60</literal>).
|
||||
seconds) can be added to this option (e.g. <option>--wait-sync=60</option>).
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
@@ -75,20 +75,20 @@
|
||||
<para>
|
||||
Under some circumstances you may wish to register a standby which is not
|
||||
yet running; this can be the case when using provisioning tools to create
|
||||
a complex replication cluster. In this case, by using the <literal>-F/--force</literal>
|
||||
a complex replication cluster. In this case, by using the <option>-F/--force</option>
|
||||
option and providing the connection parameters to the primary server,
|
||||
the standby can be registered.
|
||||
</para>
|
||||
<para>
|
||||
Similarly, with cascading replication it may be necessary to register
|
||||
a standby whose upstream node has not yet been registered - in this case,
|
||||
using <literal>-F/--force</literal> will result in the creation of an inactive placeholder
|
||||
using <option>-F/--force</option> will result in the creation of an inactive placeholder
|
||||
record for the upstream node, which will however later need to be registered
|
||||
with the <literal>-F/--force</literal> option too.
|
||||
with the <option>-F/--force</option> option too.
|
||||
</para>
|
||||
<para>
|
||||
When used with <command>repmgr standby register</command>, care should be taken that use of the
|
||||
<literal>-F/--force</literal> option does not result in an incorrectly configured cluster.
|
||||
<option>-F/--force</option> option does not result in an incorrectly configured cluster.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
@@ -96,8 +96,21 @@
|
||||
<title>Event notifications</title>
|
||||
<para>
|
||||
A <literal>standby_register</literal> <link linkend="event-notifications">event notification</link>
|
||||
will be generated.
|
||||
will be generated immediately after the node record is updated on the primary.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
If the <option>--wait-sync</option> option is provided, a <literal>standby_register_sync</literal>
|
||||
event notification will be generated immediately after the node record has synchronised to the
|
||||
standby.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
If provided, &repmgr; will subsitute the placeholders <literal>%p</literal> with the node ID of the
|
||||
primary node, <literal>%c</literal> with its <literal>conninfo</literal> string, and
|
||||
<literal>%a</literal> with its node name.
|
||||
</para>
|
||||
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
|
||||
@@ -22,9 +22,19 @@
|
||||
</para>
|
||||
<para>
|
||||
If other standbys are connected to the demotion candidate, &repmgr; can instruct
|
||||
these to follow the new primary if the option <literal>--siblings-follow</literal>
|
||||
is specified.
|
||||
these to follow the new primary if the option <literal>--siblings-follow</literal>
|
||||
is specified. This requires a passwordless SSH connection between the promotion
|
||||
candidate (new primary) and the standbys attached to the demotion candidate
|
||||
(existing primary).
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
Performing a switchover is a non-trivial operation. In particular it
|
||||
relies on the current primary being able to shut down cleanly and quickly.
|
||||
&repmgr; will attempt to check for potential issues but cannot guarantee
|
||||
a successful switchover.
|
||||
</para>
|
||||
</note>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
@@ -47,6 +57,13 @@
|
||||
<para>
|
||||
Check prerequisites but don't actually execute a switchover.
|
||||
</para>
|
||||
<important>
|
||||
<para>
|
||||
Success of <option>--dry-run</option> does not imply the switchover will
|
||||
complete successfully, only that
|
||||
the prerequisites for performing the operation are met.
|
||||
</para>
|
||||
</important>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
@@ -57,6 +74,12 @@
|
||||
<para>
|
||||
Ignore warnings and continue anyway.
|
||||
</para>
|
||||
<para>
|
||||
Specifically, if a problem is encountered when shutting down the current primary,
|
||||
using <option>-F/--force</option> will cause &repmgr; to continue by promoting
|
||||
the standby to be the new primary, and if <option>--siblings-follow</option> is
|
||||
specified, attach any other standbys to the new primary.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
@@ -103,6 +126,11 @@
|
||||
<application>repmgrd</application> should not be active on any nodes while a switchover is being
|
||||
executed. This restriction may be lifted in a later version.
|
||||
</para>
|
||||
<para>
|
||||
External database connections, e.g. from an application, should not be permitted while
|
||||
the switchover is taking place. In particular, active transactions on the primary
|
||||
can potentially disrupt the shutdown process.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
@@ -115,10 +143,48 @@
|
||||
<para>
|
||||
If using an event notification script, <literal>standby_switchover</literal>
|
||||
will populate the placeholder parameter <literal>%p</literal> with the node ID of
|
||||
the former standby.
|
||||
the former primary.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
Following exit codes can be emitted by <literal>repmgr standby switchover</literal>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The switchover completed successfully.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_SWITCHOVER_FAIL (18)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The switchover could not be executed.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_SWITCHOVER_INCOMPLETE (22)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The switchover was executed but a problem was encountered.
|
||||
Typically this means the former primary could not be reattached
|
||||
as a standby.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
<para>
|
||||
In contrast to streaming replication, there's no concept of "promoting" a new
|
||||
primary node with BDR. Instead, "failover" involves monitoring both nodes
|
||||
with `repmgrd` and redirecting queries from the failed node to the remaining
|
||||
with <application>repmgrd</application> and redirecting queries from the failed node to the remaining
|
||||
active node. This can be done by using an
|
||||
<link linkend="event-notifications">event notification</link> script
|
||||
which is called by <application>repmgrd</application> to dynamically
|
||||
@@ -174,17 +174,13 @@
|
||||
<para>
|
||||
Key to "failover" execution is the <literal>event_notification_command</literal>,
|
||||
which is a user-definable script specified in <filename>repmpgr.conf</filename>
|
||||
and which should reconfigure the proxy server/ connection pooler to point
|
||||
to the other, still-active node.
|
||||
and which can use a &repmgr; <link linkend="event-notifications">event notification</link>
|
||||
to reconfigure the proxy server / connection pooler so it points to the other, still-active node.
|
||||
Details of the event will be passed as parameters to the script.
|
||||
</para>
|
||||
<para>
|
||||
Each time &repmgr; (or <application>repmgrd</application>) records an event,
|
||||
it can optionally execute the script defined in
|
||||
<literal>event_notification_command</literal> to take further action;
|
||||
details of the event will be passed as parameters.
|
||||
</para>
|
||||
<para>
|
||||
Following placeholders are available to the script:
|
||||
Following parameter placeholders are available for the script definition in <filename>repmpgr.conf</filename>;
|
||||
these will be replaced with the appropriate value when the script is executed:
|
||||
</para>
|
||||
|
||||
<variablelist>
|
||||
@@ -231,20 +227,37 @@
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>%c</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
conninfo string of the next available node (<varname>bdr_failover</varname> and <varname>bdr_recovery</varname>)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>%a</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
name of the next available node (<varname>bdr_failover</varname> and <varname>bdr_recovery</varname>)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
|
||||
<para>
|
||||
Note that <literal>%c</literal> and <literal>%a</literal> will only be provided during
|
||||
<varname>bdr_failover</varname> events, which is what is of interest here.
|
||||
Note that <literal>%c</literal> and <literal>%a</literal> are only provided with
|
||||
particular failover events, in this case <varname>bdr_failover</varname>.
|
||||
</para>
|
||||
<para>
|
||||
The provided sample script (`scripts/bdr-pgbouncer.sh`) is configured like
|
||||
this:
|
||||
The provided sample script
|
||||
(<literal><ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/master/scripts/bdr-pgbouncer.sh">scripts/bdr-pgbouncer.sh</ulink></literal>)
|
||||
is configured as follows:
|
||||
<programlisting>
|
||||
event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a"'</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
and parses the configures parameters like this:
|
||||
and parses the placeholder parameters like this:
|
||||
<programlisting>
|
||||
NODE_ID=$1
|
||||
EVENT_TYPE=$2
|
||||
@@ -252,12 +265,14 @@
|
||||
NEXT_CONNINFO=$4
|
||||
NEXT_NODE_NAME=$5</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The script also contains some hard-coded values about the <application>PgBouncer</application>
|
||||
configuration for both nodes; these will need to be adjusted for your local environment
|
||||
(ideally the scripts would be maintained as templates and generated by some
|
||||
kind of provisioning system).
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
The sample script also contains some hard-coded values for the <application>PgBouncer</application>
|
||||
configuration for both nodes; these will need to be adjusted for your local environment
|
||||
(ideally the scripts would be maintained as templates and generated by some
|
||||
kind of provisioning system).
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
The script performs following steps:
|
||||
|
||||
@@ -60,6 +60,13 @@
|
||||
&repmgr; being able to shut down the current primary server quickly and cleanly.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Ensure that a passwordless SSH connection is possible from the promotion candidate
|
||||
(standby) to the demotion candidate (current primary). If <literal>--siblings-follow</literal>
|
||||
will be used, ensure that passwordless SSH connections are possible from the
|
||||
promotion candidate to all standbys attached to the demotion candidate.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Double-check which commands will be used to stop/start/restart the current
|
||||
primary; on the primary execute:
|
||||
|
||||
@@ -11,22 +11,86 @@
|
||||
containing bugfixes and other minor improvements. Any substantial new
|
||||
functionality will be included in a feature release (e.g. 4.0.x to 4.1.x).
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; is implemented as a PostgreSQL extension; to upgrade it, first
|
||||
install the updated package (or compile the updated source), then in the
|
||||
database where the &repmgr; extension is installed, execute
|
||||
<command>ALTER EXTENSION repmgr UPDATE</command>.
|
||||
</para>
|
||||
<para>
|
||||
If <application>repmgrd</application> is running, it may be necessary to restart
|
||||
the PostgreSQL server if the upgrade contains changes to the shared object
|
||||
file used by <application>repmgrd</application>; check the release notes for details.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Please check the <link linkend="appendix-release-notes">release notes</link> for every
|
||||
release as they may contain upgrade instructions particular to individual versions.
|
||||
</para>
|
||||
<sect1 id="upgrading-repmgr-extension" xreflabel="Upgrading repmgr 4.x and later">
|
||||
<indexterm>
|
||||
<primary>upgrading</primary>
|
||||
<secondary>repmgr 4.x and later</secondary>
|
||||
</indexterm>
|
||||
<title>Upgrading repmgr 4.x and later</title>
|
||||
<para>
|
||||
&repmgr; 4.x is implemented as a PostgreSQL extension; normally the upgrade consists
|
||||
of the two following steps:
|
||||
<orderedlist>
|
||||
<listitem>
|
||||
<simpara>
|
||||
Install the updated package (or compile the updated source)
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
In the database where the &repmgr; extension is installed, execute
|
||||
<command>ALTER EXTENSION repmgr UPDATE</command>.
|
||||
</simpara>
|
||||
</listitem>
|
||||
</orderedlist>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Always check the <link linkend="appendix-release-notes">release notes</link> for every
|
||||
release as they may contain upgrade instructions particular to individual versions.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
If the <application>repmgrd</application> daemon is in use, we recommend stopping it
|
||||
before upgrading &repmgr;.
|
||||
</para>
|
||||
<para>
|
||||
Note that it may be necessary to restart the PostgreSQL server if the upgrade contains
|
||||
changes to the shared object file used by <application>repmgrd</application>; check the
|
||||
release notes for details.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="upgrading-and-pg-upgrade" xreflabel="pg_upgrade and repmgr">
|
||||
<indexterm>
|
||||
<primary>upgrading</primary>
|
||||
<secondary>pg_upgrade</secondary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>pg_upgrade</primary>
|
||||
</indexterm>
|
||||
<title>pg_upgrade and repmgr</title>
|
||||
|
||||
<para>
|
||||
<application>pg_upgrade</application> requires that if any functions are
|
||||
dependent on a shared library, this library must be present in both
|
||||
the old and new installations before <application>pg_upgrade</application>
|
||||
can be executed.
|
||||
</para>
|
||||
<para>
|
||||
To minimize the risk of any upgrade issues (particularly if an upgrade to
|
||||
a new major &repmgr; version is involved), we recommend upgrading
|
||||
&repmgr; on the old server <emphasis>before</emphasis> running
|
||||
<application>pg_upgrade</application> to ensure that old and new
|
||||
versions are the same.
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
This issue applies to any PostgreSQL extension which has
|
||||
dependencies on a shared library.
|
||||
</simpara>
|
||||
</note>
|
||||
<para>
|
||||
For further details please see the <ulink url="https://www.postgresql.org/docs/current/static/pgupgrade.html">pg_upgrade documentation</ulink>.
|
||||
</para>
|
||||
<para>
|
||||
If replication slots are in use, bear in mind these will <emphasis>not</emphasis>
|
||||
be recreated by <application>pg_upgrade</application>. These will need to
|
||||
be recreated manually.
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
|
||||
<sect1 id="upgrading-from-repmgr-3" xreflabel="Upgrading from repmgr 3.x">
|
||||
<indexterm>
|
||||
|
||||
@@ -1 +1 @@
|
||||
<!ENTITY repmgrversion "4.0.2">
|
||||
<!ENTITY repmgrversion "4.0.3">
|
||||
|
||||
@@ -43,5 +43,6 @@
|
||||
#define ERR_BARMAN 19
|
||||
#define ERR_REGISTRATION_SYNC 20
|
||||
#define ERR_OUT_OF_MEMORY 21
|
||||
#define ERR_SWITCHOVER_INCOMPLETE 22
|
||||
|
||||
#endif /* _ERRCODE_H_ */
|
||||
|
||||
@@ -82,6 +82,7 @@ do_cluster_show(void)
|
||||
NodeInfoListCell *cell = NULL;
|
||||
int i = 0;
|
||||
ItemList warnings = {NULL, NULL};
|
||||
bool success = false;
|
||||
|
||||
/* Connect to local database to obtain cluster connection data */
|
||||
log_verbose(LOG_INFO, _("connecting to database"));
|
||||
@@ -91,11 +92,19 @@ do_cluster_show(void)
|
||||
else
|
||||
conn = establish_db_connection_by_params(&source_conninfo, true);
|
||||
|
||||
get_all_node_records_with_upstream(conn, &nodes);
|
||||
success = get_all_node_records_with_upstream(conn, &nodes);
|
||||
|
||||
if (success == false)
|
||||
{
|
||||
/* get_all_node_records_with_upstream() will print error message */
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (nodes.node_count == 0)
|
||||
{
|
||||
log_error(_("unable to retrieve any node records"));
|
||||
log_error(_("no node records were found"));
|
||||
log_hint(_("ensure at least one node is registered"));
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
@@ -131,8 +140,14 @@ do_cluster_show(void)
|
||||
}
|
||||
else
|
||||
{
|
||||
char error[MAXLEN];
|
||||
|
||||
strncpy(error, PQerrorMessage(cell->node_info->conn), MAXLEN);
|
||||
cell->node_info->node_status = NODE_STATUS_DOWN;
|
||||
cell->node_info->recovery_type = RECTYPE_UNKNOWN;
|
||||
item_list_append_format(&warnings,
|
||||
"when attempting to connect to node \"%s\" (ID: %i), following error encountered :\n\"%s\"",
|
||||
cell->node_info->node_name, cell->node_info->node_id, trim(error));
|
||||
}
|
||||
|
||||
initPQExpBuffer(&details);
|
||||
@@ -158,15 +173,13 @@ do_cluster_show(void)
|
||||
break;
|
||||
case RECTYPE_STANDBY:
|
||||
appendPQExpBuffer(&details, "! running as standby");
|
||||
item_list_append_format(
|
||||
&warnings,
|
||||
item_list_append_format(&warnings,
|
||||
"node \"%s\" (ID: %i) is registered as primary but running as standby",
|
||||
cell->node_info->node_name, cell->node_info->node_id);
|
||||
break;
|
||||
case RECTYPE_UNKNOWN:
|
||||
appendPQExpBuffer(&details, "! unknown");
|
||||
item_list_append_format(
|
||||
&warnings,
|
||||
item_list_append_format(&warnings,
|
||||
"node \"%s\" (ID: %i) has unknown replication status",
|
||||
cell->node_info->node_name, cell->node_info->node_id);
|
||||
break;
|
||||
@@ -177,16 +190,14 @@ do_cluster_show(void)
|
||||
if (cell->node_info->recovery_type == RECTYPE_PRIMARY)
|
||||
{
|
||||
appendPQExpBuffer(&details, "! running");
|
||||
item_list_append_format(
|
||||
&warnings,
|
||||
item_list_append_format(&warnings,
|
||||
"node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
|
||||
cell->node_info->node_name, cell->node_info->node_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
appendPQExpBuffer(&details, "! running as standby");
|
||||
item_list_append_format(
|
||||
&warnings,
|
||||
item_list_append_format(&warnings,
|
||||
"node \"%s\" (ID: %i) is registered as an inactive primary but running as standby",
|
||||
cell->node_info->node_name, cell->node_info->node_id);
|
||||
}
|
||||
@@ -199,8 +210,7 @@ do_cluster_show(void)
|
||||
if (cell->node_info->active == true)
|
||||
{
|
||||
appendPQExpBuffer(&details, "? unreachable");
|
||||
item_list_append_format(
|
||||
&warnings,
|
||||
item_list_append_format(&warnings,
|
||||
"node \"%s\" (ID: %i) is registered as an active primary but is unreachable",
|
||||
cell->node_info->node_name, cell->node_info->node_id);
|
||||
}
|
||||
@@ -226,8 +236,7 @@ do_cluster_show(void)
|
||||
break;
|
||||
case RECTYPE_PRIMARY:
|
||||
appendPQExpBuffer(&details, "! running as primary");
|
||||
item_list_append_format(
|
||||
&warnings,
|
||||
item_list_append_format(&warnings,
|
||||
"node \"%s\" (ID: %i) is registered as standby but running as primary",
|
||||
cell->node_info->node_name, cell->node_info->node_id);
|
||||
break;
|
||||
@@ -245,16 +254,14 @@ do_cluster_show(void)
|
||||
if (cell->node_info->recovery_type == RECTYPE_STANDBY)
|
||||
{
|
||||
appendPQExpBuffer(&details, "! running");
|
||||
item_list_append_format(
|
||||
&warnings,
|
||||
item_list_append_format(&warnings,
|
||||
"node \"%s\" (ID: %i) is running but the repmgr node record is inactive",
|
||||
cell->node_info->node_name, cell->node_info->node_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
appendPQExpBuffer(&details, "! running as primary");
|
||||
item_list_append_format(
|
||||
&warnings,
|
||||
item_list_append_format(&warnings,
|
||||
"node \"%s\" (ID: %i) is running as primary but the repmgr node record is inactive",
|
||||
cell->node_info->node_name, cell->node_info->node_id);
|
||||
}
|
||||
@@ -267,8 +274,7 @@ do_cluster_show(void)
|
||||
if (cell->node_info->active == true)
|
||||
{
|
||||
appendPQExpBuffer(&details, "? unreachable");
|
||||
item_list_append_format(
|
||||
&warnings,
|
||||
item_list_append_format(&warnings,
|
||||
"node \"%s\" (ID: %i) is registered as an active standby but is unreachable",
|
||||
cell->node_info->node_name, cell->node_info->node_id);
|
||||
}
|
||||
@@ -416,7 +422,7 @@ do_cluster_show(void)
|
||||
printf(_("\nWARNING: following issues were detected\n"));
|
||||
for (cell = warnings.head; cell; cell = cell->next)
|
||||
{
|
||||
printf(_(" %s\n"), cell->string);
|
||||
printf(_(" - %s\n"), cell->string);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1144,7 +1150,7 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length)
|
||||
}
|
||||
else
|
||||
{
|
||||
t_conninfo_param_list remote_conninfo;
|
||||
t_conninfo_param_list remote_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
||||
char *host = NULL;
|
||||
PQExpBufferData quoted_command;
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ static void _do_node_status_is_shutdown_cleanly(void);
|
||||
static void _do_node_archive_config(void);
|
||||
static void _do_node_restore_config(void);
|
||||
|
||||
static void do_node_check_replication_connection(void);
|
||||
static CheckStatus do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list_output);
|
||||
static CheckStatus do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_output);
|
||||
static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||
@@ -249,8 +250,7 @@ do_node_status(void)
|
||||
if (node_info.max_wal_senders >= 0)
|
||||
{
|
||||
/* In CSV mode, raw values supplied as well */
|
||||
key_value_list_set_format(
|
||||
&node_status,
|
||||
key_value_list_set_format(&node_status,
|
||||
"Replication connections",
|
||||
"%i (of maximal %i)",
|
||||
node_info.attached_wal_receivers,
|
||||
@@ -258,8 +258,7 @@ do_node_status(void)
|
||||
}
|
||||
else if (node_info.max_wal_senders == 0)
|
||||
{
|
||||
key_value_list_set_format(
|
||||
&node_status,
|
||||
key_value_list_set_format(&node_status,
|
||||
"Replication connections",
|
||||
"disabled");
|
||||
}
|
||||
@@ -276,8 +275,7 @@ do_node_status(void)
|
||||
|
||||
initPQExpBuffer(&slotinfo);
|
||||
|
||||
appendPQExpBuffer(
|
||||
&slotinfo,
|
||||
appendPQExpBuffer(&slotinfo,
|
||||
"%i (of maximal %i)",
|
||||
node_info.active_replication_slots + node_info.inactive_replication_slots,
|
||||
node_info.max_replication_slots);
|
||||
@@ -289,8 +287,7 @@ do_node_status(void)
|
||||
"; %i inactive",
|
||||
node_info.inactive_replication_slots);
|
||||
|
||||
item_list_append_format(
|
||||
&warnings,
|
||||
item_list_append_format(&warnings,
|
||||
_("- node has %i inactive replication slots"),
|
||||
node_info.inactive_replication_slots);
|
||||
}
|
||||
@@ -309,13 +306,44 @@ do_node_status(void)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* check for missing replication slots - we do this regardless of
|
||||
* what "max_replication_slots" is set to
|
||||
*/
|
||||
|
||||
{
|
||||
NodeInfoList missing_slots = T_NODE_INFO_LIST_INITIALIZER;
|
||||
get_downsteam_nodes_with_missing_slot(conn,
|
||||
config_file_options.node_id,
|
||||
&missing_slots);
|
||||
|
||||
if (missing_slots.node_count > 0)
|
||||
{
|
||||
NodeInfoListCell *missing_slot_cell = NULL;
|
||||
|
||||
item_list_append_format(&warnings,
|
||||
_("- replication slots missing for following %i node(s):"),
|
||||
missing_slots.node_count);
|
||||
|
||||
for (missing_slot_cell = missing_slots.head; missing_slot_cell; missing_slot_cell = missing_slot_cell->next)
|
||||
{
|
||||
item_list_append_format(&warnings,
|
||||
_(" - %s (ID: %i, slot name: \"%s\")"),
|
||||
missing_slot_cell->node_info->node_name,
|
||||
missing_slot_cell->node_info->node_id,
|
||||
missing_slot_cell->node_info->slot_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (node_info.type == STANDBY)
|
||||
{
|
||||
key_value_list_set_format(&node_status,
|
||||
"Upstream node",
|
||||
"%s (ID: %i)",
|
||||
node_info.node_name,
|
||||
node_info.node_id);
|
||||
node_info.upstream_node_name,
|
||||
node_info.upstream_node_id);
|
||||
|
||||
get_replication_info(conn, &replication_info);
|
||||
|
||||
@@ -463,8 +491,7 @@ _do_node_status_is_shutdown_cleanly(void)
|
||||
|
||||
initPQExpBuffer(&output);
|
||||
|
||||
appendPQExpBuffer(
|
||||
&output,
|
||||
appendPQExpBuffer(&output,
|
||||
"--state=");
|
||||
|
||||
/* sanity-check we're dealing with a PostgreSQL directory */
|
||||
@@ -580,6 +607,11 @@ do_node_check(void)
|
||||
exit(return_code);
|
||||
}
|
||||
|
||||
if (runtime_options.replication_connection == true)
|
||||
{
|
||||
do_node_check_replication_connection();
|
||||
exit(SUCCESS);
|
||||
}
|
||||
|
||||
if (strlen(config_file_options.conninfo))
|
||||
conn = establish_db_connection(config_file_options.conninfo, true);
|
||||
@@ -883,6 +915,67 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
do_node_check_replication_connection(void)
|
||||
{
|
||||
PGconn *local_conn = NULL;
|
||||
PGconn *repl_conn = NULL;
|
||||
t_node_info node_record = T_NODE_INFO_INITIALIZER;
|
||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||
t_conninfo_param_list remote_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
||||
PQExpBufferData output;
|
||||
|
||||
|
||||
initPQExpBuffer(&output);
|
||||
appendPQExpBuffer(&output,
|
||||
"--connection=");
|
||||
|
||||
if (runtime_options.remote_node_id == UNKNOWN_NODE_ID)
|
||||
{
|
||||
appendPQExpBuffer(&output, "UNKNOWN");
|
||||
printf("%s\n", output.data);
|
||||
termPQExpBuffer(&output);
|
||||
return;
|
||||
}
|
||||
|
||||
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
||||
|
||||
record_status = get_node_record(local_conn, runtime_options.remote_node_id, &node_record);
|
||||
PQfinish(local_conn);
|
||||
|
||||
if (record_status != RECORD_FOUND)
|
||||
{
|
||||
appendPQExpBuffer(&output, "UNKNOWN");
|
||||
printf("%s\n", output.data);
|
||||
termPQExpBuffer(&output);
|
||||
return;
|
||||
}
|
||||
|
||||
initialize_conninfo_params(&remote_conninfo, false);
|
||||
parse_conninfo_string(node_record.conninfo, &remote_conninfo, NULL, false);
|
||||
|
||||
param_set(&remote_conninfo, "replication", "1");
|
||||
param_set(&remote_conninfo, "user", node_record.repluser);
|
||||
|
||||
repl_conn = establish_db_connection_by_params(&remote_conninfo, false);
|
||||
|
||||
if (PQstatus(repl_conn) != CONNECTION_OK)
|
||||
{
|
||||
appendPQExpBuffer(&output, "BAD");
|
||||
printf("%s\n", output.data);
|
||||
termPQExpBuffer(&output);
|
||||
return;
|
||||
}
|
||||
|
||||
PQfinish(repl_conn);
|
||||
|
||||
appendPQExpBuffer(&output, "OK");
|
||||
printf("%s\n", output.data);
|
||||
termPQExpBuffer(&output);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static CheckStatus
|
||||
do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list_output)
|
||||
{
|
||||
@@ -1590,6 +1683,7 @@ do_node_rejoin(void)
|
||||
|
||||
bool success = true;
|
||||
int server_version_num = UNKNOWN_SERVER_VERSION_NUM;
|
||||
int follow_error_code = SUCCESS;
|
||||
|
||||
/* check node is not actually running */
|
||||
|
||||
@@ -1859,7 +1953,31 @@ do_node_rejoin(void)
|
||||
|
||||
success = do_standby_follow_internal(upstream_conn,
|
||||
&primary_node_record,
|
||||
&follow_output);
|
||||
&follow_output,
|
||||
&follow_error_code);
|
||||
|
||||
if (success == false)
|
||||
{
|
||||
log_notice(_("NODE REJOIN failed"));
|
||||
log_detail("%s", follow_output.data);
|
||||
|
||||
create_event_notification(upstream_conn,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"node_rejoin",
|
||||
success,
|
||||
follow_output.data);
|
||||
|
||||
PQfinish(upstream_conn);
|
||||
|
||||
termPQExpBuffer(&follow_output);
|
||||
exit(follow_error_code);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX add checks that node actually started and connected to primary,
|
||||
* if not exit with ERR_REJOIN_FAIL
|
||||
*/
|
||||
|
||||
create_event_notification(upstream_conn,
|
||||
&config_file_options,
|
||||
@@ -1870,19 +1988,12 @@ do_node_rejoin(void)
|
||||
|
||||
PQfinish(upstream_conn);
|
||||
|
||||
if (success == false)
|
||||
{
|
||||
log_notice(_("NODE REJOIN failed"));
|
||||
log_detail("%s", follow_output.data);
|
||||
|
||||
termPQExpBuffer(&follow_output);
|
||||
exit(ERR_DB_QUERY);
|
||||
}
|
||||
|
||||
log_notice(_("NODE REJOIN successful"));
|
||||
log_detail("%s", follow_output.data);
|
||||
|
||||
termPQExpBuffer(&follow_output);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -548,7 +548,8 @@ do_primary_help(void)
|
||||
printf(_(" \"primary unregister\" unregisters an inactive primary node.\n"));
|
||||
puts("");
|
||||
printf(_(" --dry-run check what would happen, but don't actually unregister the primary\n"));
|
||||
printf(_(" -F, --force force removal of the record\n"));
|
||||
printf(_(" --node-id ID of the inactive primary node to unregister.\n"));
|
||||
printf(_(" -F, --force force removal of an active record\n"));
|
||||
|
||||
puts("");
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -28,7 +28,7 @@ extern void do_standby_switchover(void);
|
||||
|
||||
extern void do_standby_help(void);
|
||||
|
||||
extern bool do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_record, PQExpBufferData *output);
|
||||
extern bool do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_record, PQExpBufferData *output, int *error_code);
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -110,12 +110,12 @@ do_witness_register(void)
|
||||
}
|
||||
|
||||
/* check primary node's recovery type */
|
||||
recovery_type = get_recovery_type(witness_conn);
|
||||
recovery_type = get_recovery_type(primary_conn);
|
||||
|
||||
if (recovery_type == RECTYPE_STANDBY)
|
||||
{
|
||||
log_error(_("provided primary node is a standby"));
|
||||
log_error(_("provide the connection details of the cluster's primary server"));
|
||||
log_hint(_("provide the connection details of the cluster's primary server"));
|
||||
|
||||
PQfinish(witness_conn);
|
||||
PQfinish(primary_conn);
|
||||
|
||||
@@ -68,6 +68,7 @@ typedef struct
|
||||
int node_id;
|
||||
char node_name[MAXLEN];
|
||||
char data_dir[MAXPGPATH];
|
||||
int remote_node_id;
|
||||
|
||||
/* "standby clone" options */
|
||||
bool copy_external_config_files;
|
||||
@@ -103,6 +104,7 @@ typedef struct
|
||||
bool role;
|
||||
bool slots;
|
||||
bool has_passfile;
|
||||
bool replication_connection;
|
||||
|
||||
/* "node join" options */
|
||||
char config_files[MAXLEN];
|
||||
@@ -139,8 +141,8 @@ typedef struct
|
||||
"", "", "", "", \
|
||||
/* other connection options */ \
|
||||
"", "", \
|
||||
/* node options */ \
|
||||
UNKNOWN_NODE_ID, "", "", \
|
||||
/* general node options */ \
|
||||
UNKNOWN_NODE_ID, "", "", UNKNOWN_NODE_ID, \
|
||||
/* "standby clone" options */ \
|
||||
false, CONFIG_FILE_SAMEPATH, false, false, false, "", "", "", \
|
||||
false, \
|
||||
@@ -153,7 +155,7 @@ typedef struct
|
||||
/* "node status" options */ \
|
||||
false, \
|
||||
/* "node check" options */ \
|
||||
false, false, false, false, false, false, \
|
||||
false, false, false, false, false, false, false, \
|
||||
/* "node join" options */ \
|
||||
"", \
|
||||
/* "node service" options */ \
|
||||
@@ -179,6 +181,7 @@ typedef enum
|
||||
ACTION_NONE,
|
||||
ACTION_START,
|
||||
ACTION_STOP,
|
||||
ACTION_STOP_WAIT,
|
||||
ACTION_RESTART,
|
||||
ACTION_RELOAD,
|
||||
ACTION_PROMOTE
|
||||
|
||||
140
repmgr-client.c
140
repmgr-client.c
@@ -60,7 +60,6 @@
|
||||
#include "repmgr-action-witness.h"
|
||||
#include "repmgr-action-bdr.h"
|
||||
#include "repmgr-action-node.h"
|
||||
|
||||
#include "repmgr-action-cluster.h"
|
||||
|
||||
#include <storage/fd.h> /* for PG_TEMP_FILE_PREFIX */
|
||||
@@ -73,7 +72,7 @@ t_runtime_options runtime_options = T_RUNTIME_OPTIONS_INITIALIZER;
|
||||
t_configuration_options config_file_options = T_CONFIGURATION_OPTIONS_INITIALIZER;
|
||||
|
||||
/* conninfo params for the node we're operating on */
|
||||
t_conninfo_param_list source_conninfo;
|
||||
t_conninfo_param_list source_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
||||
|
||||
bool config_file_required = true;
|
||||
char pg_bindir[MAXLEN] = "";
|
||||
@@ -95,7 +94,7 @@ static ItemList cli_warnings = {NULL, NULL};
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
t_conninfo_param_list default_conninfo;
|
||||
t_conninfo_param_list default_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
||||
|
||||
int optindex;
|
||||
int c;
|
||||
@@ -329,6 +328,11 @@ main(int argc, char **argv)
|
||||
strncpy(runtime_options.node_name, optarg, MAXLEN);
|
||||
break;
|
||||
|
||||
/* --remote-node-id */
|
||||
case OPT_REMOTE_NODE_ID:
|
||||
runtime_options.remote_node_id = repmgr_atoi(optarg, "--remote-node-id", &cli_errors, false);
|
||||
break;
|
||||
|
||||
/*
|
||||
* standby options * ---------------
|
||||
*/
|
||||
@@ -455,6 +459,10 @@ main(int argc, char **argv)
|
||||
runtime_options.has_passfile = true;
|
||||
break;
|
||||
|
||||
case OPT_REPL_CONN:
|
||||
runtime_options.replication_connection = true;
|
||||
break;
|
||||
|
||||
/*--------------------
|
||||
* "node rejoin" options
|
||||
*--------------------
|
||||
@@ -737,7 +745,6 @@ main(int argc, char **argv)
|
||||
|
||||
if (repmgr_command != NULL)
|
||||
{
|
||||
#ifndef BDR_ONLY
|
||||
if (strcasecmp(repmgr_command, "PRIMARY") == 0 || strcasecmp(repmgr_command, "MASTER") == 0)
|
||||
{
|
||||
if (help_option == true)
|
||||
@@ -794,9 +801,6 @@ main(int argc, char **argv)
|
||||
action = WITNESS_UNREGISTER;
|
||||
}
|
||||
else if (strcasecmp(repmgr_command, "BDR") == 0)
|
||||
#else
|
||||
if (strcasecmp(repmgr_command, "BDR") == 0)
|
||||
#endif
|
||||
{
|
||||
if (help_option == true)
|
||||
{
|
||||
@@ -1001,7 +1005,7 @@ main(int argc, char **argv)
|
||||
&& config_file_options.use_replication_slots == true)
|
||||
{
|
||||
log_error(_("STANDBY CLONE in Barman mode is incompatible with configuration option \"use_replication_slots\""));
|
||||
log_hint(_("set \"use_replication_slots\" to \"no\" in repmgr.conf, or use --without-barman fo clone directly from the upstream server"));
|
||||
log_hint(_("set \"use_replication_slots\" to \"no\" in repmgr.conf, or use --without-barman to clone directly from the upstream server"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
@@ -1157,7 +1161,6 @@ main(int argc, char **argv)
|
||||
|
||||
switch (action)
|
||||
{
|
||||
#ifndef BDR_ONLY
|
||||
/* PRIMARY */
|
||||
case PRIMARY_REGISTER:
|
||||
do_primary_register();
|
||||
@@ -1193,21 +1196,6 @@ main(int argc, char **argv)
|
||||
case WITNESS_UNREGISTER:
|
||||
do_witness_unregister();
|
||||
break;
|
||||
#else
|
||||
/* we won't ever reach here, but stop the compiler complaining */
|
||||
case PRIMARY_REGISTER:
|
||||
case PRIMARY_UNREGISTER:
|
||||
case STANDBY_CLONE:
|
||||
case STANDBY_REGISTER:
|
||||
case STANDBY_UNREGISTER:
|
||||
case STANDBY_PROMOTE:
|
||||
case STANDBY_FOLLOW:
|
||||
case STANDBY_SWITCHOVER:
|
||||
case WITNESS_REGISTER:
|
||||
case WITNESS_UNREGISTER:
|
||||
break;
|
||||
|
||||
#endif
|
||||
/* BDR */
|
||||
case BDR_REGISTER:
|
||||
do_bdr_register();
|
||||
@@ -1599,8 +1587,7 @@ check_cli_parameters(const int action)
|
||||
case NODE_STATUS:
|
||||
break;
|
||||
default:
|
||||
item_list_append_format(
|
||||
&cli_warnings,
|
||||
item_list_append_format(&cli_warnings,
|
||||
_("--is-shutdown-cleanly will be ignored when executing %s"),
|
||||
action_name(action));
|
||||
}
|
||||
@@ -1613,8 +1600,7 @@ check_cli_parameters(const int action)
|
||||
case STANDBY_SWITCHOVER:
|
||||
break;
|
||||
default:
|
||||
item_list_append_format(
|
||||
&cli_warnings,
|
||||
item_list_append_format(&cli_warnings,
|
||||
_("--always-promote will be ignored when executing %s"),
|
||||
action_name(action));
|
||||
}
|
||||
@@ -1628,8 +1614,7 @@ check_cli_parameters(const int action)
|
||||
case NODE_REJOIN:
|
||||
break;
|
||||
default:
|
||||
item_list_append_format(
|
||||
&cli_warnings,
|
||||
item_list_append_format(&cli_warnings,
|
||||
_("--force-rewind will be ignored when executing %s"),
|
||||
action_name(action));
|
||||
}
|
||||
@@ -1643,8 +1628,7 @@ check_cli_parameters(const int action)
|
||||
case NODE_REJOIN:
|
||||
break;
|
||||
default:
|
||||
item_list_append_format(
|
||||
&cli_warnings,
|
||||
item_list_append_format(&cli_warnings,
|
||||
_("--config-files will be ignored when executing %s"),
|
||||
action_name(action));
|
||||
}
|
||||
@@ -1658,6 +1642,7 @@ check_cli_parameters(const int action)
|
||||
case PRIMARY_UNREGISTER:
|
||||
case STANDBY_CLONE:
|
||||
case STANDBY_REGISTER:
|
||||
case STANDBY_FOLLOW:
|
||||
case STANDBY_SWITCHOVER:
|
||||
case WITNESS_REGISTER:
|
||||
case WITNESS_UNREGISTER:
|
||||
@@ -1665,8 +1650,7 @@ check_cli_parameters(const int action)
|
||||
case NODE_SERVICE:
|
||||
break;
|
||||
default:
|
||||
item_list_append_format(
|
||||
&cli_warnings,
|
||||
item_list_append_format(&cli_warnings,
|
||||
_("--dry-run is not effective when executing %s"),
|
||||
action_name(action));
|
||||
}
|
||||
@@ -1688,8 +1672,7 @@ check_cli_parameters(const int action)
|
||||
if (used_options > 1)
|
||||
{
|
||||
/* TODO: list which options were used */
|
||||
item_list_append(
|
||||
&cli_errors,
|
||||
item_list_append(&cli_errors,
|
||||
"only one of --csv, --nagios and --optformat can be used");
|
||||
}
|
||||
}
|
||||
@@ -1793,10 +1776,8 @@ do_help(void)
|
||||
print_help_header();
|
||||
|
||||
printf(_("Usage:\n"));
|
||||
#ifndef BDR_ONLY
|
||||
printf(_(" %s [OPTIONS] primary {register|unregister}\n"), progname());
|
||||
printf(_(" %s [OPTIONS] standby {register|unregister|clone|promote|follow}\n"), progname());
|
||||
#endif
|
||||
printf(_(" %s [OPTIONS] bdr {register|unregister}\n"), progname());
|
||||
printf(_(" %s [OPTIONS] node status\n"), progname());
|
||||
printf(_(" %s [OPTIONS] cluster {show|event|matrix|crosscheck}\n"), progname());
|
||||
@@ -2123,9 +2104,12 @@ test_ssh_connection(char *host, char *remote_user)
|
||||
bool
|
||||
local_command(const char *command, PQExpBufferData *outputbuf)
|
||||
{
|
||||
FILE *fp;
|
||||
FILE *fp = NULL;
|
||||
char output[MAXLEN];
|
||||
int retval = 0;
|
||||
bool success;
|
||||
|
||||
log_verbose(LOG_DEBUG, "executing:\n %s", command);
|
||||
|
||||
if (outputbuf == NULL)
|
||||
{
|
||||
@@ -2141,20 +2125,29 @@ local_command(const char *command, PQExpBufferData *outputbuf)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* TODO: better error handling */
|
||||
|
||||
while (fgets(output, MAXLEN, fp) != NULL)
|
||||
{
|
||||
appendPQExpBuffer(outputbuf, "%s", output);
|
||||
if (!feof(fp))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pclose(fp);
|
||||
retval = pclose(fp);
|
||||
|
||||
/* */
|
||||
success = (WEXITSTATUS(retval) == 0 || WEXITSTATUS(retval) == 141) ? true : false;
|
||||
|
||||
log_verbose(LOG_DEBUG, "result of command was %i (%i)", WEXITSTATUS(retval), retval);
|
||||
|
||||
if (outputbuf->data != NULL)
|
||||
log_verbose(LOG_DEBUG, "local_command(): output returned was:\n%s", outputbuf->data);
|
||||
else
|
||||
log_verbose(LOG_DEBUG, "local_command(): no output returned");
|
||||
|
||||
return true;
|
||||
return success;
|
||||
}
|
||||
|
||||
|
||||
@@ -2416,7 +2409,12 @@ remote_command(const char *host, const char *user, const char *command, PQExpBuf
|
||||
pclose(fp);
|
||||
|
||||
if (outputbuf != NULL)
|
||||
log_verbose(LOG_DEBUG, "remote_command(): output returned was:\n %s", outputbuf->data);
|
||||
{
|
||||
if (strlen(outputbuf->data))
|
||||
log_verbose(LOG_DEBUG, "remote_command(): output returned was:\n %s", outputbuf->data);
|
||||
else
|
||||
log_verbose(LOG_DEBUG, "remote_command(): no output returned");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -2462,18 +2460,15 @@ get_server_action(t_server_action action, char *script, char *data_dir)
|
||||
{
|
||||
initPQExpBuffer(&command);
|
||||
|
||||
appendPQExpBuffer(
|
||||
&command,
|
||||
appendPQExpBuffer(&command,
|
||||
"%s %s -w -D ",
|
||||
make_pg_path("pg_ctl"),
|
||||
config_file_options.pg_ctl_options);
|
||||
|
||||
appendShellString(
|
||||
&command,
|
||||
appendShellString(&command,
|
||||
data_dir);
|
||||
|
||||
appendPQExpBuffer(
|
||||
&command,
|
||||
appendPQExpBuffer(&command,
|
||||
" start");
|
||||
|
||||
strncpy(script, command.data, MAXLEN);
|
||||
@@ -2485,6 +2480,7 @@ get_server_action(t_server_action action, char *script, char *data_dir)
|
||||
}
|
||||
|
||||
case ACTION_STOP:
|
||||
case ACTION_STOP_WAIT:
|
||||
{
|
||||
if (config_file_options.service_stop_command[0] != '\0')
|
||||
{
|
||||
@@ -2494,19 +2490,23 @@ get_server_action(t_server_action action, char *script, char *data_dir)
|
||||
else
|
||||
{
|
||||
initPQExpBuffer(&command);
|
||||
appendPQExpBuffer(
|
||||
&command,
|
||||
appendPQExpBuffer(&command,
|
||||
"%s %s -D ",
|
||||
make_pg_path("pg_ctl"),
|
||||
config_file_options.pg_ctl_options);
|
||||
|
||||
appendShellString(
|
||||
&command,
|
||||
appendShellString(&command,
|
||||
data_dir);
|
||||
|
||||
appendPQExpBuffer(
|
||||
&command,
|
||||
" -m fast -W stop");
|
||||
if (action == ACTION_STOP_WAIT)
|
||||
appendPQExpBuffer(&command,
|
||||
" -w");
|
||||
else
|
||||
appendPQExpBuffer(&command,
|
||||
" -W");
|
||||
|
||||
appendPQExpBuffer(&command,
|
||||
" -m fast stop");
|
||||
|
||||
strncpy(script, command.data, MAXLEN);
|
||||
|
||||
@@ -2525,18 +2525,15 @@ get_server_action(t_server_action action, char *script, char *data_dir)
|
||||
else
|
||||
{
|
||||
initPQExpBuffer(&command);
|
||||
appendPQExpBuffer(
|
||||
&command,
|
||||
appendPQExpBuffer(&command,
|
||||
"%s %s -w -D ",
|
||||
make_pg_path("pg_ctl"),
|
||||
config_file_options.pg_ctl_options);
|
||||
|
||||
appendShellString(
|
||||
&command,
|
||||
appendShellString(&command,
|
||||
data_dir);
|
||||
|
||||
appendPQExpBuffer(
|
||||
&command,
|
||||
appendPQExpBuffer(&command,
|
||||
" restart");
|
||||
|
||||
strncpy(script, command.data, MAXLEN);
|
||||
@@ -2556,18 +2553,15 @@ get_server_action(t_server_action action, char *script, char *data_dir)
|
||||
else
|
||||
{
|
||||
initPQExpBuffer(&command);
|
||||
appendPQExpBuffer(
|
||||
&command,
|
||||
appendPQExpBuffer(&command,
|
||||
"%s %s -w -D ",
|
||||
make_pg_path("pg_ctl"),
|
||||
config_file_options.pg_ctl_options);
|
||||
|
||||
appendShellString(
|
||||
&command,
|
||||
appendShellString(&command,
|
||||
data_dir);
|
||||
|
||||
appendPQExpBuffer(
|
||||
&command,
|
||||
appendPQExpBuffer(&command,
|
||||
" reload");
|
||||
|
||||
strncpy(script, command.data, MAXLEN);
|
||||
@@ -2588,18 +2582,15 @@ get_server_action(t_server_action action, char *script, char *data_dir)
|
||||
else
|
||||
{
|
||||
initPQExpBuffer(&command);
|
||||
appendPQExpBuffer(
|
||||
&command,
|
||||
appendPQExpBuffer(&command,
|
||||
"%s %s -w -D ",
|
||||
make_pg_path("pg_ctl"),
|
||||
config_file_options.pg_ctl_options);
|
||||
|
||||
appendShellString(
|
||||
&command,
|
||||
appendShellString(&command,
|
||||
data_dir);
|
||||
|
||||
appendPQExpBuffer(
|
||||
&command,
|
||||
appendPQExpBuffer(&command,
|
||||
" promote");
|
||||
|
||||
strncpy(script, command.data, MAXLEN);
|
||||
@@ -2633,6 +2624,7 @@ data_dir_required_for_action(t_server_action action)
|
||||
return true;
|
||||
|
||||
case ACTION_STOP:
|
||||
case ACTION_STOP_WAIT:
|
||||
if (config_file_options.service_stop_command[0] != '\0')
|
||||
{
|
||||
return false;
|
||||
|
||||
@@ -83,6 +83,8 @@
|
||||
#define OPT_CONFIG_ARCHIVE_DIR 1034
|
||||
#define OPT_HAS_PASSFILE 1035
|
||||
#define OPT_WAIT_START 1036
|
||||
#define OPT_REPL_CONN 1037
|
||||
#define OPT_REMOTE_NODE_ID 1038
|
||||
|
||||
/* deprecated since 3.3 */
|
||||
#define OPT_DATA_DIR 999
|
||||
@@ -115,6 +117,7 @@ static struct option long_options[] =
|
||||
{"pgdata", required_argument, NULL, 'D'},
|
||||
{"node-id", required_argument, NULL, OPT_NODE_ID},
|
||||
{"node-name", required_argument, NULL, OPT_NODE_NAME},
|
||||
{"remote-node-id", required_argument, NULL, OPT_REMOTE_NODE_ID},
|
||||
|
||||
/* logging options */
|
||||
{"log-level", required_argument, NULL, 'L'},
|
||||
@@ -158,6 +161,7 @@ static struct option long_options[] =
|
||||
{"role", no_argument, NULL, OPT_ROLE},
|
||||
{"slots", no_argument, NULL, OPT_SLOTS},
|
||||
{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
|
||||
{"replication-connection", no_argument, NULL, OPT_REPL_CONN},
|
||||
|
||||
/* "node rejoin" options */
|
||||
{"config-files", required_argument, NULL, OPT_CONFIG_FILES},
|
||||
|
||||
8
repmgr.c
8
repmgr.c
@@ -288,7 +288,6 @@ standby_get_last_updated(PG_FUNCTION_ARGS)
|
||||
Datum
|
||||
notify_follow_primary(PG_FUNCTION_ARGS)
|
||||
{
|
||||
#ifndef BDR_ONLY
|
||||
int primary_node_id = UNKNOWN_NODE_ID;
|
||||
|
||||
if (!shared_state)
|
||||
@@ -316,7 +315,7 @@ notify_follow_primary(PG_FUNCTION_ARGS)
|
||||
}
|
||||
|
||||
LWLockRelease(shared_state->lock);
|
||||
#endif
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
@@ -329,14 +328,12 @@ get_new_primary(PG_FUNCTION_ARGS)
|
||||
if (!shared_state)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
#ifndef BDR_ONLY
|
||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
||||
|
||||
if (shared_state->follow_new_primary == true)
|
||||
new_primary_node_id = shared_state->candidate_node_id;
|
||||
|
||||
LWLockRelease(shared_state->lock);
|
||||
#endif
|
||||
|
||||
if (new_primary_node_id == UNKNOWN_NODE_ID)
|
||||
PG_RETURN_NULL();
|
||||
@@ -348,7 +345,6 @@ get_new_primary(PG_FUNCTION_ARGS)
|
||||
Datum
|
||||
reset_voting_status(PG_FUNCTION_ARGS)
|
||||
{
|
||||
#ifndef BDR_ONLY
|
||||
if (!shared_state)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
@@ -366,7 +362,7 @@ reset_voting_status(PG_FUNCTION_ARGS)
|
||||
}
|
||||
|
||||
LWLockRelease(shared_state->lock);
|
||||
#endif
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
|
||||
@@ -235,8 +235,9 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
||||
#primary_notification_timeout=60 # Interval (in seconds) which repmgrd on a standby
|
||||
# will wait for a notification from the new primary,
|
||||
# before falling back to degraded monitoring
|
||||
#monitoring_history=no
|
||||
|
||||
#monitoring_history=no # Whether to write monitoring data to the "montoring_history" table
|
||||
#monitor_interval_secs=2 # Interval (in seconds) at which to write monitoring data
|
||||
#degraded_monitoring_timeout=-1 # Interval (in seconds) after which repmgrd will terminate if the
|
||||
# server being monitored is no longer available. -1 (default)
|
||||
# disables the timeout completely.
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
#define REPMGR_VERSION_DATE ""
|
||||
#define REPMGR_VERSION "4.0.2"
|
||||
#define REPMGR_VERSION "4.0.3"
|
||||
|
||||
|
||||
@@ -54,7 +54,6 @@ typedef enum
|
||||
static PGconn *upstream_conn = NULL;
|
||||
static PGconn *primary_conn = NULL;
|
||||
|
||||
#ifndef BDR_ONLY
|
||||
static FailoverState failover_state = FAILOVER_STATE_UNKNOWN;
|
||||
|
||||
static int primary_node_id = UNKNOWN_NODE_ID;
|
||||
@@ -85,15 +84,12 @@ static void update_monitoring_history(void);
|
||||
|
||||
static const char * format_failover_state(FailoverState failover_state);
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* perform some sanity checks on the node's configuration */
|
||||
|
||||
void
|
||||
do_physical_node_check(void)
|
||||
{
|
||||
#ifndef BDR_ONLY
|
||||
/*
|
||||
* Check if node record is active - if not, and `failover=automatic`, the
|
||||
* node won't be considered as a promotion candidate; this often happens
|
||||
@@ -163,7 +159,6 @@ do_physical_node_check(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -174,7 +169,6 @@ do_physical_node_check(void)
|
||||
void
|
||||
monitor_streaming_primary(void)
|
||||
{
|
||||
#ifndef BDR_ONLY
|
||||
instr_time log_status_interval_start;
|
||||
PQExpBufferData event_details;
|
||||
|
||||
@@ -485,14 +479,12 @@ loop:
|
||||
|
||||
sleep(config_file_options.monitor_interval_secs);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
monitor_streaming_standby(void)
|
||||
{
|
||||
#ifndef BDR_ONLY
|
||||
RecordStatus record_status;
|
||||
instr_time log_status_interval_start;
|
||||
PQExpBufferData event_details;
|
||||
@@ -1019,14 +1011,12 @@ loop:
|
||||
|
||||
sleep(config_file_options.monitor_interval_secs);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
monitor_streaming_witness(void)
|
||||
{
|
||||
#ifndef BDR_ONLY
|
||||
instr_time log_status_interval_start;
|
||||
instr_time witness_sync_interval_start;
|
||||
|
||||
@@ -1351,13 +1341,12 @@ loop:
|
||||
|
||||
sleep(config_file_options.monitor_interval_secs);
|
||||
}
|
||||
#endif
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
||||
#ifndef BDR_ONLY
|
||||
static bool
|
||||
do_primary_failover(void)
|
||||
{
|
||||
@@ -2722,7 +2711,6 @@ format_failover_state(FailoverState failover_state)
|
||||
return "UNKNOWN_FAILOVER_STATE";
|
||||
}
|
||||
|
||||
#endif /* #ifndef BDR_ONLY */
|
||||
|
||||
void
|
||||
close_connections_physical()
|
||||
|
||||
39
repmgrd.c
39
repmgrd.c
@@ -89,6 +89,7 @@ main(int argc, char **argv)
|
||||
bool cli_monitoring_history = false;
|
||||
|
||||
RecordStatus record_status;
|
||||
ExtensionStatus extension_status = REPMGR_UNKNOWN;
|
||||
|
||||
FILE *fd;
|
||||
|
||||
@@ -318,6 +319,37 @@ main(int argc, char **argv)
|
||||
* repmgr has not been properly configured.
|
||||
*/
|
||||
|
||||
/* Check "repmgr" the extension is installed */
|
||||
extension_status = get_repmgr_extension_status(local_conn);
|
||||
|
||||
if (extension_status != REPMGR_INSTALLED)
|
||||
{
|
||||
/* this is unlikely to happen */
|
||||
if (extension_status == REPMGR_UNKNOWN)
|
||||
{
|
||||
log_error(_("unable to determine status of \"repmgr\" extension"));
|
||||
log_detail("%s", PQerrorMessage(local_conn));
|
||||
PQfinish(local_conn);
|
||||
exit(ERR_DB_QUERY);
|
||||
}
|
||||
|
||||
log_error(_("repmgr extension not found on this node"));
|
||||
|
||||
if (extension_status == REPMGR_AVAILABLE)
|
||||
{
|
||||
log_detail(_("repmgr extension is available but not installed in database \"%s\""),
|
||||
PQdb(local_conn));
|
||||
}
|
||||
else if (extension_status == REPMGR_UNAVAILABLE)
|
||||
{
|
||||
log_detail(_("repmgr extension is not available on this node"));
|
||||
}
|
||||
|
||||
log_hint(_("check that this node is part of a repmgr cluster"));
|
||||
PQfinish(local_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* Retrieve record for this node from the local database */
|
||||
record_status = get_node_record(local_conn, config_file_options.node_id, &local_node_info);
|
||||
|
||||
@@ -400,7 +432,6 @@ start_monitoring(void)
|
||||
{
|
||||
switch (local_node_info.type)
|
||||
{
|
||||
#ifndef BDR_ONLY
|
||||
case PRIMARY:
|
||||
monitor_streaming_primary();
|
||||
break;
|
||||
@@ -409,12 +440,6 @@ start_monitoring(void)
|
||||
break;
|
||||
case WITNESS:
|
||||
monitor_streaming_witness();
|
||||
break;
|
||||
#else
|
||||
case PRIMARY:
|
||||
case STANDBY:
|
||||
return;
|
||||
#endif
|
||||
case BDR:
|
||||
monitor_bdr();
|
||||
return;
|
||||
|
||||
Reference in New Issue
Block a user