mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
38 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b17993abdb | ||
|
|
8f68344f9a | ||
|
|
125ac6c297 | ||
|
|
955860923f | ||
|
|
50626f90cc | ||
|
|
9aea5b8aa7 | ||
|
|
ed1bcb159e | ||
|
|
9c72c0d66e | ||
|
|
0ddc226c2a | ||
|
|
93830cad61 | ||
|
|
bca1660d5e | ||
|
|
5a52917421 | ||
|
|
70752d7d4a | ||
|
|
c29d1efc37 | ||
|
|
6fbbe2a97a | ||
|
|
ce42d6827e | ||
|
|
98384559a6 | ||
|
|
4a1477343b | ||
|
|
d2b9d20393 | ||
|
|
fe594c95ad | ||
|
|
60e63feaca | ||
|
|
ae4d0f2622 | ||
|
|
5e8b41e221 | ||
|
|
c7a585c555 | ||
|
|
a27dd8c49c | ||
|
|
9365bf3474 | ||
|
|
e8ae0831fe | ||
|
|
518866eba5 | ||
|
|
ed0330c334 | ||
|
|
1f021dc9fa | ||
|
|
425839d764 | ||
|
|
3a764f678a | ||
|
|
829cf5cca4 | ||
|
|
14420d83fa | ||
|
|
a80e22f0ed | ||
|
|
832993bfbc | ||
|
|
f1ea5e62df | ||
|
|
b47448d0e5 |
19
HISTORY
19
HISTORY
@@ -1,4 +1,20 @@
|
|||||||
4.0.3 2018-02-
|
4.0.4 2018-03-08
|
||||||
|
repmgr: add "standby clone --recovery-conf-only" option; GitHub #382 (Ian)
|
||||||
|
repmgr: make "standby promote" timeout values configurable; GitHub #387 (Ian)
|
||||||
|
repmgr: improve replication slot warnings generated by "node status";
|
||||||
|
GitHub #385 (Ian)
|
||||||
|
repmgr: remove restriction on replication slots when cloning from
|
||||||
|
a Barman server; GitHub #379 (Ian)
|
||||||
|
repmgr: ensure "node rejoin" honours "--dry-run" option; GitHub #383 (Ian)
|
||||||
|
repmgr: fix --superuser handling when cloning a standby; GitHub #380 (Ian)
|
||||||
|
repmgr: update various help options; GitHub #391, #392 (hasegeli)
|
||||||
|
repmgrd: add event "repmgrd_shutdown"; GitHub #393 (Ian)
|
||||||
|
repmgrd: improve detection of status change from primary to standby (Ian)
|
||||||
|
repmgrd: improve log output in various situations (Ian)
|
||||||
|
repmgrd: improve reconnection to the local node after a failover (Ian)
|
||||||
|
repmgrd: ensure witness server connects to new primary after a failover (Ian)
|
||||||
|
|
||||||
|
4.0.3 2018-02-15
|
||||||
repmgr: improve switchover handling when "pg_ctl" used to control the
|
repmgr: improve switchover handling when "pg_ctl" used to control the
|
||||||
server and logging output is not explicitly redirected (Ian)
|
server and logging output is not explicitly redirected (Ian)
|
||||||
repmgr: improve switchover log messages and exit code when old primary could
|
repmgr: improve switchover log messages and exit code when old primary could
|
||||||
@@ -17,6 +33,7 @@
|
|||||||
repmgr: fix upstream node display in "repmgr node status"; GitHub #363 (fanf2)
|
repmgr: fix upstream node display in "repmgr node status"; GitHub #363 (fanf2)
|
||||||
repmgr: improve/clarify documentation and update --help output for
|
repmgr: improve/clarify documentation and update --help output for
|
||||||
"primary unregister"; GitHub #373 (Ian)
|
"primary unregister"; GitHub #373 (Ian)
|
||||||
|
repmgr: allow replication slots when Barman is configured; GitHub #379 (Ian)
|
||||||
repmgr: fix parsing of "pg_basebackup_options"; GitHub #376 (Ian)
|
repmgr: fix parsing of "pg_basebackup_options"; GitHub #376 (Ian)
|
||||||
repmgr: ensure "pg_subtrans" directory is created when cloning a standby in
|
repmgr: ensure "pg_subtrans" directory is created when cloning a standby in
|
||||||
Barman mode (Ian)
|
Barman mode (Ian)
|
||||||
|
|||||||
43
configfile.c
43
configfile.c
@@ -303,7 +303,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
options->log_status_interval = DEFAULT_LOG_STATUS_INTERVAL;
|
options->log_status_interval = DEFAULT_LOG_STATUS_INTERVAL;
|
||||||
|
|
||||||
/*-----------------------
|
/*-----------------------
|
||||||
* standby action settings
|
* standby clone settings
|
||||||
*------------------------
|
*------------------------
|
||||||
*/
|
*/
|
||||||
options->use_replication_slots = false;
|
options->use_replication_slots = false;
|
||||||
@@ -317,6 +317,13 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
options->use_primary_conninfo_password = false;
|
options->use_primary_conninfo_password = false;
|
||||||
memset(options->passfile, 0, sizeof(options->passfile));
|
memset(options->passfile, 0, sizeof(options->passfile));
|
||||||
|
|
||||||
|
/*-----------------------
|
||||||
|
* standby promote settings
|
||||||
|
*------------------------
|
||||||
|
*/
|
||||||
|
options->promote_check_timeout = DEFAULT_PROMOTE_CHECK_TIMEOUT;
|
||||||
|
options->promote_check_interval = DEFAULT_PROMOTE_CHECK_INTERVAL;
|
||||||
|
|
||||||
/*-----------------
|
/*-----------------
|
||||||
* repmgrd settings
|
* repmgrd settings
|
||||||
*-----------------
|
*-----------------
|
||||||
@@ -337,6 +344,7 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
options->async_query_timeout = DEFAULT_ASYNC_QUERY_TIMEOUT;
|
options->async_query_timeout = DEFAULT_ASYNC_QUERY_TIMEOUT;
|
||||||
options->primary_notification_timeout = DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT;
|
options->primary_notification_timeout = DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT;
|
||||||
options->primary_follow_timeout = DEFAULT_PRIMARY_FOLLOW_TIMEOUT;
|
options->primary_follow_timeout = DEFAULT_PRIMARY_FOLLOW_TIMEOUT;
|
||||||
|
options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT;
|
||||||
|
|
||||||
/*-------------
|
/*-------------
|
||||||
* witness settings
|
* witness settings
|
||||||
@@ -505,6 +513,13 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
else if (strcmp(name, "passfile") == 0)
|
else if (strcmp(name, "passfile") == 0)
|
||||||
strncpy(options->passfile, value, sizeof(options->passfile));
|
strncpy(options->passfile, value, sizeof(options->passfile));
|
||||||
|
|
||||||
|
/* standby promote settings */
|
||||||
|
else if (strcmp(name, "promote_check_timeout") == 0)
|
||||||
|
options->promote_check_timeout = repmgr_atoi(value, name, error_list, 1);
|
||||||
|
|
||||||
|
else if (strcmp(name, "promote_check_interval") == 0)
|
||||||
|
options->promote_check_interval = repmgr_atoi(value, name, error_list, 1);
|
||||||
|
|
||||||
/* node check settings */
|
/* node check settings */
|
||||||
else if (strcmp(name, "archive_ready_warning") == 0)
|
else if (strcmp(name, "archive_ready_warning") == 0)
|
||||||
options->archive_ready_warning = repmgr_atoi(value, name, error_list, 1);
|
options->archive_ready_warning = repmgr_atoi(value, name, error_list, 1);
|
||||||
@@ -556,6 +571,8 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
options->primary_notification_timeout = repmgr_atoi(value, name, error_list, 0);
|
options->primary_notification_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||||
else if (strcmp(name, "primary_follow_timeout") == 0)
|
else if (strcmp(name, "primary_follow_timeout") == 0)
|
||||||
options->primary_follow_timeout = repmgr_atoi(value, name, error_list, 0);
|
options->primary_follow_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||||
|
else if (strcmp(name, "standby_reconnect_timeout") == 0)
|
||||||
|
options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||||
|
|
||||||
/* witness settings */
|
/* witness settings */
|
||||||
else if (strcmp(name, "witness_sync_interval") == 0)
|
else if (strcmp(name, "witness_sync_interval") == 0)
|
||||||
@@ -1028,7 +1045,7 @@ reload_config(t_configuration_options *orig_options)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strcmp(new_options.node_name, orig_options->node_name) != 0)
|
if (strncmp(new_options.node_name, orig_options->node_name, MAXLEN) != 0)
|
||||||
{
|
{
|
||||||
log_warning(_("\"node_name\" cannot be changed, keeping current configuration"));
|
log_warning(_("\"node_name\" cannot be changed, keeping current configuration"));
|
||||||
return false;
|
return false;
|
||||||
@@ -1072,7 +1089,7 @@ reload_config(t_configuration_options *orig_options)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* conninfo */
|
/* conninfo */
|
||||||
if (strcmp(orig_options->conninfo, new_options.conninfo) != 0)
|
if (strncmp(orig_options->conninfo, new_options.conninfo, MAXLEN) != 0)
|
||||||
{
|
{
|
||||||
/* Test conninfo string works */
|
/* Test conninfo string works */
|
||||||
conn = establish_db_connection(new_options.conninfo, false);
|
conn = establish_db_connection(new_options.conninfo, false);
|
||||||
@@ -1099,7 +1116,7 @@ reload_config(t_configuration_options *orig_options)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* event_notification_command */
|
/* event_notification_command */
|
||||||
if (strcmp(orig_options->event_notification_command, new_options.event_notification_command) != 0)
|
if (strncmp(orig_options->event_notification_command, new_options.event_notification_command, MAXLEN) != 0)
|
||||||
{
|
{
|
||||||
strncpy(orig_options->event_notification_command, new_options.event_notification_command, MAXLEN);
|
strncpy(orig_options->event_notification_command, new_options.event_notification_command, MAXLEN);
|
||||||
log_info(_("\"event_notification_command\" is now \"%s\""), new_options.event_notification_command);
|
log_info(_("\"event_notification_command\" is now \"%s\""), new_options.event_notification_command);
|
||||||
@@ -1108,7 +1125,7 @@ reload_config(t_configuration_options *orig_options)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* event_notifications */
|
/* event_notifications */
|
||||||
if (strcmp(orig_options->event_notifications_orig, new_options.event_notifications_orig) != 0)
|
if (strncmp(orig_options->event_notifications_orig, new_options.event_notifications_orig, MAXLEN) != 0)
|
||||||
{
|
{
|
||||||
strncpy(orig_options->event_notifications_orig, new_options.event_notifications_orig, MAXLEN);
|
strncpy(orig_options->event_notifications_orig, new_options.event_notifications_orig, MAXLEN);
|
||||||
log_info(_("\"event_notifications\" is now \"%s\""), new_options.event_notifications_orig);
|
log_info(_("\"event_notifications\" is now \"%s\""), new_options.event_notifications_orig);
|
||||||
@@ -1128,7 +1145,7 @@ reload_config(t_configuration_options *orig_options)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* follow_command */
|
/* follow_command */
|
||||||
if (strcmp(orig_options->follow_command, new_options.follow_command) != 0)
|
if (strncmp(orig_options->follow_command, new_options.follow_command, MAXLEN) != 0)
|
||||||
{
|
{
|
||||||
strncpy(orig_options->follow_command, new_options.follow_command, MAXLEN);
|
strncpy(orig_options->follow_command, new_options.follow_command, MAXLEN);
|
||||||
log_info(_("\"follow_command\" is now \"%s\""), new_options.follow_command);
|
log_info(_("\"follow_command\" is now \"%s\""), new_options.follow_command);
|
||||||
@@ -1165,7 +1182,7 @@ reload_config(t_configuration_options *orig_options)
|
|||||||
|
|
||||||
|
|
||||||
/* promote_command */
|
/* promote_command */
|
||||||
if (strcmp(orig_options->promote_command, new_options.promote_command) != 0)
|
if (strncmp(orig_options->promote_command, new_options.promote_command, MAXLEN) != 0)
|
||||||
{
|
{
|
||||||
strncpy(orig_options->promote_command, new_options.promote_command, MAXLEN);
|
strncpy(orig_options->promote_command, new_options.promote_command, MAXLEN);
|
||||||
log_info(_("\"promote_command\" is now \"%s\""), new_options.promote_command);
|
log_info(_("\"promote_command\" is now \"%s\""), new_options.promote_command);
|
||||||
@@ -1205,18 +1222,18 @@ reload_config(t_configuration_options *orig_options)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* log_facility */
|
/* log_facility */
|
||||||
if (strcmp(orig_options->log_facility, new_options.log_facility) != 0)
|
if (strncmp(orig_options->log_facility, new_options.log_facility, MAXLEN) != 0)
|
||||||
{
|
{
|
||||||
strcpy(orig_options->log_facility, new_options.log_facility);
|
strncpy(orig_options->log_facility, new_options.log_facility, MAXLEN);
|
||||||
log_info(_("\"log_facility\" is now \"%s\""), new_options.log_facility);
|
log_info(_("\"log_facility\" is now \"%s\""), new_options.log_facility);
|
||||||
|
|
||||||
log_config_changed = true;
|
log_config_changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* log_file */
|
/* log_file */
|
||||||
if (strcmp(orig_options->log_file, new_options.log_file) != 0)
|
if (strncmp(orig_options->log_file, new_options.log_file, MAXLEN) != 0)
|
||||||
{
|
{
|
||||||
strcpy(orig_options->log_file, new_options.log_file);
|
strncpy(orig_options->log_file, new_options.log_file, MAXLEN);
|
||||||
log_info(_("\"log_file\" is now \"%s\""), new_options.log_file);
|
log_info(_("\"log_file\" is now \"%s\""), new_options.log_file);
|
||||||
|
|
||||||
log_config_changed = true;
|
log_config_changed = true;
|
||||||
@@ -1224,9 +1241,9 @@ reload_config(t_configuration_options *orig_options)
|
|||||||
|
|
||||||
|
|
||||||
/* log_level */
|
/* log_level */
|
||||||
if (strcmp(orig_options->log_level, new_options.log_level) != 0)
|
if (strncmp(orig_options->log_level, new_options.log_level, MAXLEN) != 0)
|
||||||
{
|
{
|
||||||
strcpy(orig_options->log_level, new_options.log_level);
|
strncpy(orig_options->log_level, new_options.log_level, MAXLEN);
|
||||||
log_info(_("\"log_level\" is now \"%s\""), new_options.log_level);
|
log_info(_("\"log_level\" is now \"%s\""), new_options.log_level);
|
||||||
|
|
||||||
log_config_changed = true;
|
log_config_changed = true;
|
||||||
|
|||||||
10
configfile.h
10
configfile.h
@@ -82,7 +82,7 @@ typedef struct
|
|||||||
char log_file[MAXLEN];
|
char log_file[MAXLEN];
|
||||||
int log_status_interval;
|
int log_status_interval;
|
||||||
|
|
||||||
/* standby action settings */
|
/* standby clone settings */
|
||||||
bool use_replication_slots;
|
bool use_replication_slots;
|
||||||
char pg_basebackup_options[MAXLEN];
|
char pg_basebackup_options[MAXLEN];
|
||||||
char restore_command[MAXLEN];
|
char restore_command[MAXLEN];
|
||||||
@@ -92,6 +92,10 @@ typedef struct
|
|||||||
bool use_primary_conninfo_password;
|
bool use_primary_conninfo_password;
|
||||||
char passfile[MAXPGPATH];
|
char passfile[MAXPGPATH];
|
||||||
|
|
||||||
|
/* standby promote settings */
|
||||||
|
int promote_check_timeout;
|
||||||
|
int promote_check_interval;
|
||||||
|
|
||||||
/* node check settings */
|
/* node check settings */
|
||||||
int archive_ready_warning;
|
int archive_ready_warning;
|
||||||
int archive_ready_critical;
|
int archive_ready_critical;
|
||||||
@@ -115,6 +119,7 @@ typedef struct
|
|||||||
int async_query_timeout;
|
int async_query_timeout;
|
||||||
int primary_notification_timeout;
|
int primary_notification_timeout;
|
||||||
int primary_follow_timeout;
|
int primary_follow_timeout;
|
||||||
|
int standby_reconnect_timeout;
|
||||||
|
|
||||||
/* BDR settings */
|
/* BDR settings */
|
||||||
bool bdr_local_monitoring_only;
|
bool bdr_local_monitoring_only;
|
||||||
@@ -158,6 +163,8 @@ typedef struct
|
|||||||
"", "", "", DEFAULT_LOG_STATUS_INTERVAL, \
|
"", "", "", DEFAULT_LOG_STATUS_INTERVAL, \
|
||||||
/* standby action settings */ \
|
/* standby action settings */ \
|
||||||
false, "", "", { NULL, NULL }, "", false, false, "", \
|
false, "", "", { NULL, NULL }, "", false, false, "", \
|
||||||
|
/* standby promote settings */ \
|
||||||
|
DEFAULT_PROMOTE_CHECK_TIMEOUT, DEFAULT_PROMOTE_CHECK_INTERVAL, \
|
||||||
/* node check settings */ \
|
/* node check settings */ \
|
||||||
DEFAULT_ARCHIVE_READY_WARNING, DEFAULT_ARCHIVE_READY_CRITICAL, \
|
DEFAULT_ARCHIVE_READY_WARNING, DEFAULT_ARCHIVE_READY_CRITICAL, \
|
||||||
DEFAULT_REPLICATION_LAG_WARNING, DEFAULT_REPLICATION_LAG_CRITICAL, \
|
DEFAULT_REPLICATION_LAG_WARNING, DEFAULT_REPLICATION_LAG_CRITICAL, \
|
||||||
@@ -172,6 +179,7 @@ typedef struct
|
|||||||
DEFAULT_ASYNC_QUERY_TIMEOUT, \
|
DEFAULT_ASYNC_QUERY_TIMEOUT, \
|
||||||
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
|
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
|
||||||
DEFAULT_PRIMARY_FOLLOW_TIMEOUT, \
|
DEFAULT_PRIMARY_FOLLOW_TIMEOUT, \
|
||||||
|
DEFAULT_STANDBY_RECONNECT_TIMEOUT, \
|
||||||
/* BDR settings */ \
|
/* BDR settings */ \
|
||||||
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
|
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
|
||||||
/* service settings */ \
|
/* service settings */ \
|
||||||
|
|||||||
18
configure
vendored
18
configure
vendored
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.69 for repmgr 4.0.3.
|
# Generated by GNU Autoconf 2.69 for repmgr 4.0.4.
|
||||||
#
|
#
|
||||||
# Report bugs to <pgsql-bugs@postgresql.org>.
|
# Report bugs to <pgsql-bugs@postgresql.org>.
|
||||||
#
|
#
|
||||||
@@ -582,8 +582,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='repmgr'
|
PACKAGE_NAME='repmgr'
|
||||||
PACKAGE_TARNAME='repmgr'
|
PACKAGE_TARNAME='repmgr'
|
||||||
PACKAGE_VERSION='4.0.3'
|
PACKAGE_VERSION='4.0.4'
|
||||||
PACKAGE_STRING='repmgr 4.0.3'
|
PACKAGE_STRING='repmgr 4.0.4'
|
||||||
PACKAGE_BUGREPORT='pgsql-bugs@postgresql.org'
|
PACKAGE_BUGREPORT='pgsql-bugs@postgresql.org'
|
||||||
PACKAGE_URL='https://2ndquadrant.com/en/resources/repmgr/'
|
PACKAGE_URL='https://2ndquadrant.com/en/resources/repmgr/'
|
||||||
|
|
||||||
@@ -1178,7 +1178,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures repmgr 4.0.3 to adapt to many kinds of systems.
|
\`configure' configures repmgr 4.0.4 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1239,7 +1239,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of repmgr 4.0.3:";;
|
short | recursive ) echo "Configuration of repmgr 4.0.4:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1313,7 +1313,7 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
repmgr configure 4.0.3
|
repmgr configure 4.0.4
|
||||||
generated by GNU Autoconf 2.69
|
generated by GNU Autoconf 2.69
|
||||||
|
|
||||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||||
@@ -1332,7 +1332,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by repmgr $as_me 4.0.3, which was
|
It was created by repmgr $as_me 4.0.4, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
$ $0 $@
|
$ $0 $@
|
||||||
@@ -2359,7 +2359,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by repmgr $as_me 4.0.3, which was
|
This file was extended by repmgr $as_me 4.0.4, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -2422,7 +2422,7 @@ _ACEOF
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
repmgr config.status 4.0.3
|
repmgr config.status 4.0.4
|
||||||
configured by $0, generated by GNU Autoconf 2.69,
|
configured by $0, generated by GNU Autoconf 2.69,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
AC_INIT([repmgr], [4.0.3], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
|
AC_INIT([repmgr], [4.0.4], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
|
||||||
|
|
||||||
AC_COPYRIGHT([Copyright (c) 2010-2018, 2ndQuadrant Ltd.])
|
AC_COPYRIGHT([Copyright (c) 2010-2018, 2ndQuadrant Ltd.])
|
||||||
|
|
||||||
|
|||||||
118
dbutils.c
118
dbutils.c
@@ -219,8 +219,7 @@ establish_db_connection_quiet(const char *conninfo)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
PGconn
|
PGconn *
|
||||||
*
|
|
||||||
establish_primary_db_connection(PGconn *conn,
|
establish_primary_db_connection(PGconn *conn,
|
||||||
const bool exit_on_error)
|
const bool exit_on_error)
|
||||||
{
|
{
|
||||||
@@ -237,36 +236,6 @@ establish_primary_db_connection(PGconn *conn,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
PGconn *
|
|
||||||
establish_db_connection_as_user(const char *conninfo,
|
|
||||||
const char *user,
|
|
||||||
const bool exit_on_error)
|
|
||||||
{
|
|
||||||
PGconn *conn = NULL;
|
|
||||||
t_conninfo_param_list conninfo_params = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
|
||||||
bool parse_success = false;
|
|
||||||
char *errmsg = NULL;
|
|
||||||
|
|
||||||
initialize_conninfo_params(&conninfo_params, false);
|
|
||||||
|
|
||||||
parse_success = parse_conninfo_string(conninfo, &conninfo_params, errmsg, true);
|
|
||||||
|
|
||||||
if (parse_success == false)
|
|
||||||
{
|
|
||||||
log_error(_("unable to pass provided conninfo string:\n %s"), errmsg);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
param_set(&conninfo_params, "user", user);
|
|
||||||
|
|
||||||
conn = establish_db_connection_by_params(&conninfo_params, false);
|
|
||||||
|
|
||||||
return conn;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
PGconn *
|
PGconn *
|
||||||
establish_db_connection_by_params(t_conninfo_param_list *param_list,
|
establish_db_connection_by_params(t_conninfo_param_list *param_list,
|
||||||
const bool exit_on_error)
|
const bool exit_on_error)
|
||||||
@@ -1060,7 +1029,7 @@ get_server_version(PGconn *conn, char *server_version)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (server_version != NULL)
|
if (server_version != NULL)
|
||||||
strcpy(server_version, PQgetvalue(res, 0, 1));
|
strncpy(server_version, PQgetvalue(res, 0, 1), MAXVERSIONSTR);
|
||||||
|
|
||||||
server_version_num = atoi(PQgetvalue(res, 0, 0));
|
server_version_num = atoi(PQgetvalue(res, 0, 0));
|
||||||
|
|
||||||
@@ -1803,7 +1772,7 @@ _populate_node_record(PGresult *res, t_node_info *node_info, int row)
|
|||||||
strncpy(node_info->config_file, PQgetvalue(res, row, 10), MAXLEN);
|
strncpy(node_info->config_file, PQgetvalue(res, row, 10), MAXLEN);
|
||||||
|
|
||||||
/* This won't normally be set */
|
/* This won't normally be set */
|
||||||
strncpy(node_info->upstream_node_name, PQgetvalue(res, row, 10), MAXLEN);
|
strncpy(node_info->upstream_node_name, PQgetvalue(res, row, 11), MAXLEN);
|
||||||
|
|
||||||
/* Set remaining struct fields with default values */
|
/* Set remaining struct fields with default values */
|
||||||
node_info->node_status = NODE_STATUS_UNKNOWN;
|
node_info->node_status = NODE_STATUS_UNKNOWN;
|
||||||
@@ -1887,6 +1856,36 @@ get_node_record(PGconn *conn, int node_id, t_node_info *node_info)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
RecordStatus
|
||||||
|
get_node_record_with_upstream(PGconn *conn, int node_id, t_node_info *node_info)
|
||||||
|
{
|
||||||
|
PQExpBufferData query;
|
||||||
|
RecordStatus result;
|
||||||
|
|
||||||
|
initPQExpBuffer(&query);
|
||||||
|
appendPQExpBuffer(&query,
|
||||||
|
" SELECT n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, "
|
||||||
|
" n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name "
|
||||||
|
" FROM repmgr.nodes n "
|
||||||
|
" LEFT JOIN repmgr.nodes un "
|
||||||
|
" ON un.node_id = n.upstream_node_id"
|
||||||
|
" WHERE n.node_id = %i",
|
||||||
|
node_id);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "get_node_record():\n %s", query.data);
|
||||||
|
|
||||||
|
result = _get_node_record(conn, query.data, node_info);
|
||||||
|
termPQExpBuffer(&query);
|
||||||
|
|
||||||
|
if (result == RECORD_NOT_FOUND)
|
||||||
|
{
|
||||||
|
log_verbose(LOG_DEBUG, "get_node_record(): no record found for node %i", node_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
RecordStatus
|
RecordStatus
|
||||||
get_node_record_by_name(PGconn *conn, const char *node_name, t_node_info *node_info)
|
get_node_record_by_name(PGconn *conn, const char *node_name, t_node_info *node_info)
|
||||||
{
|
{
|
||||||
@@ -2140,7 +2139,7 @@ get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list)
|
|||||||
|
|
||||||
appendPQExpBuffer(&query,
|
appendPQExpBuffer(&query,
|
||||||
" SELECT n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, "
|
" SELECT n.node_id, n.type, n.upstream_node_id, n.node_name, n.conninfo, n.repluser, "
|
||||||
" n.slot_name, n.location, n.priority, n.active, un.node_name AS upstream_node_name "
|
" n.slot_name, n.location, n.priority, n.active, n.config_file, un.node_name AS upstream_node_name "
|
||||||
" FROM repmgr.nodes n "
|
" FROM repmgr.nodes n "
|
||||||
" LEFT JOIN repmgr.nodes un "
|
" LEFT JOIN repmgr.nodes un "
|
||||||
" ON un.node_id = n.upstream_node_id"
|
" ON un.node_id = n.upstream_node_id"
|
||||||
@@ -2170,7 +2169,7 @@ get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list)
|
|||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
get_downsteam_nodes_with_missing_slot(PGconn *conn, int this_node_id, NodeInfoList *node_list)
|
get_downstream_nodes_with_missing_slot(PGconn *conn, int this_node_id, NodeInfoList *node_list)
|
||||||
{
|
{
|
||||||
PQExpBufferData query;
|
PQExpBufferData query;
|
||||||
PGresult *res = NULL;
|
PGresult *res = NULL;
|
||||||
@@ -2181,9 +2180,10 @@ get_downsteam_nodes_with_missing_slot(PGconn *conn, int this_node_id, NodeInfoLi
|
|||||||
" SELECT " REPMGR_NODES_COLUMNS
|
" SELECT " REPMGR_NODES_COLUMNS
|
||||||
" FROM repmgr.nodes n "
|
" FROM repmgr.nodes n "
|
||||||
"LEFT JOIN pg_catalog.pg_replication_slots rs "
|
"LEFT JOIN pg_catalog.pg_replication_slots rs "
|
||||||
" ON rs.slot_name = n.node_name "
|
" ON rs.slot_name = n.slot_name "
|
||||||
" WHERE rs.slot_name IS NULL "
|
" WHERE n.slot_name IS NOT NULL"
|
||||||
" AND n.node_id != %i ",
|
" AND rs.slot_name IS NULL "
|
||||||
|
" AND n.upstream_node_id = %i ",
|
||||||
this_node_id);
|
this_node_id);
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "get_all_node_records_with_missing_slot():\n%s", query.data);
|
log_verbose(LOG_DEBUG, "get_all_node_records_with_missing_slot():\n%s", query.data);
|
||||||
@@ -2347,8 +2347,7 @@ update_node_record_set_active(PGconn *conn, int this_node_id, bool active)
|
|||||||
|
|
||||||
initPQExpBuffer(&query);
|
initPQExpBuffer(&query);
|
||||||
|
|
||||||
appendPQExpBuffer(
|
appendPQExpBuffer(&query,
|
||||||
&query,
|
|
||||||
"UPDATE repmgr.nodes SET active = %s "
|
"UPDATE repmgr.nodes SET active = %s "
|
||||||
" WHERE node_id = %i",
|
" WHERE node_id = %i",
|
||||||
active == true ? "TRUE" : "FALSE",
|
active == true ? "TRUE" : "FALSE",
|
||||||
@@ -2373,6 +2372,40 @@ update_node_record_set_active(PGconn *conn, int this_node_id, bool active)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool
|
||||||
|
update_node_record_set_active_standby(PGconn *conn, int this_node_id)
|
||||||
|
{
|
||||||
|
PQExpBufferData query;
|
||||||
|
PGresult *res = NULL;
|
||||||
|
|
||||||
|
initPQExpBuffer(&query);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&query,
|
||||||
|
"UPDATE repmgr.nodes "
|
||||||
|
" SET type = 'standby', "
|
||||||
|
" active = TRUE "
|
||||||
|
" WHERE node_id = %i",
|
||||||
|
this_node_id);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "update_node_record_set_active_standby():\n %s", query.data);
|
||||||
|
|
||||||
|
res = PQexec(conn, query.data);
|
||||||
|
termPQExpBuffer(&query);
|
||||||
|
|
||||||
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
log_error(_("unable to update node record:\n %s"),
|
||||||
|
PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
update_node_record_set_primary(PGconn *conn, int this_node_id)
|
update_node_record_set_primary(PGconn *conn, int this_node_id)
|
||||||
{
|
{
|
||||||
@@ -3461,6 +3494,9 @@ create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, P
|
|||||||
PGresult *res = NULL;
|
PGresult *res = NULL;
|
||||||
t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
|
t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
|
||||||
|
|
||||||
|
if (server_version_num == UNKNOWN_SERVER_VERSION_NUM)
|
||||||
|
server_version_num = get_server_version(conn, NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check whether slot exists already; if it exists and is active, that
|
* Check whether slot exists already; if it exists and is active, that
|
||||||
* means another active standby is using it, which creates an error
|
* means another active standby is using it, which creates an error
|
||||||
|
|||||||
@@ -343,9 +343,6 @@ bool atobool(const char *value);
|
|||||||
PGconn *establish_db_connection(const char *conninfo,
|
PGconn *establish_db_connection(const char *conninfo,
|
||||||
const bool exit_on_error);
|
const bool exit_on_error);
|
||||||
PGconn *establish_db_connection_quiet(const char *conninfo);
|
PGconn *establish_db_connection_quiet(const char *conninfo);
|
||||||
PGconn *establish_db_connection_as_user(const char *conninfo,
|
|
||||||
const char *user,
|
|
||||||
const bool exit_on_error);
|
|
||||||
|
|
||||||
PGconn *establish_db_connection_by_params(t_conninfo_param_list *param_list,
|
PGconn *establish_db_connection_by_params(t_conninfo_param_list *param_list,
|
||||||
const bool exit_on_error);
|
const bool exit_on_error);
|
||||||
@@ -408,6 +405,8 @@ t_server_type parse_node_type(const char *type);
|
|||||||
const char *get_node_type_string(t_server_type type);
|
const char *get_node_type_string(t_server_type type);
|
||||||
|
|
||||||
RecordStatus get_node_record(PGconn *conn, int node_id, t_node_info *node_info);
|
RecordStatus get_node_record(PGconn *conn, int node_id, t_node_info *node_info);
|
||||||
|
RecordStatus get_node_record_with_upstream(PGconn *conn, int node_id, t_node_info *node_info);
|
||||||
|
|
||||||
RecordStatus get_node_record_by_name(PGconn *conn, const char *node_name, t_node_info *node_info);
|
RecordStatus get_node_record_by_name(PGconn *conn, const char *node_name, t_node_info *node_info);
|
||||||
t_node_info *get_node_record_pointer(PGconn *conn, int node_id);
|
t_node_info *get_node_record_pointer(PGconn *conn, int node_id);
|
||||||
|
|
||||||
@@ -419,7 +418,7 @@ void get_downstream_node_records(PGconn *conn, int node_id, NodeInfoList *nodes
|
|||||||
void get_active_sibling_node_records(PGconn *conn, int node_id, int upstream_node_id, NodeInfoList *node_list);
|
void get_active_sibling_node_records(PGconn *conn, int node_id, int upstream_node_id, NodeInfoList *node_list);
|
||||||
void get_node_records_by_priority(PGconn *conn, NodeInfoList *node_list);
|
void get_node_records_by_priority(PGconn *conn, NodeInfoList *node_list);
|
||||||
bool get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list);
|
bool get_all_node_records_with_upstream(PGconn *conn, NodeInfoList *node_list);
|
||||||
bool get_downsteam_nodes_with_missing_slot(PGconn *conn, int this_node_id, NodeInfoList *noede_list);
|
bool get_downstream_nodes_with_missing_slot(PGconn *conn, int this_node_id, NodeInfoList *noede_list);
|
||||||
|
|
||||||
bool create_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info);
|
bool create_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info);
|
||||||
bool update_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info);
|
bool update_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_info);
|
||||||
@@ -428,6 +427,7 @@ bool truncate_node_records(PGconn *conn);
|
|||||||
|
|
||||||
bool update_node_record_set_active(PGconn *conn, int this_node_id, bool active);
|
bool update_node_record_set_active(PGconn *conn, int this_node_id, bool active);
|
||||||
bool update_node_record_set_primary(PGconn *conn, int this_node_id);
|
bool update_node_record_set_primary(PGconn *conn, int this_node_id);
|
||||||
|
bool update_node_record_set_active_standby(PGconn *conn, int this_node_id);
|
||||||
bool update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id);
|
bool update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id);
|
||||||
bool update_node_record_status(PGconn *conn, int this_node_id, char *type, int upstream_node_id, bool active);
|
bool update_node_record_status(PGconn *conn, int this_node_id, char *type, int upstream_node_id, bool active);
|
||||||
bool update_node_record_conn_priority(PGconn *conn, t_configuration_options *options);
|
bool update_node_record_conn_priority(PGconn *conn, t_configuration_options *options);
|
||||||
|
|||||||
@@ -69,12 +69,35 @@
|
|||||||
in a streaming replication cluster.
|
in a streaming replication cluster.
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="faq-upgrades" xreflabel="Upgrading PostgreSQL with repmgr">
|
||||||
|
<title>Can &repmgr; assist with upgrading a PostgreSQL cluster?</title>
|
||||||
|
<para>
|
||||||
|
For <emphasis>minor</emphasis> version upgrades, e.g. from 9.6.7 to 9.6.8, a common
|
||||||
|
approach is to upgrade a standby to the latest version, perform a
|
||||||
|
<link linkend="performing-switchover">switchover</link> promoting it to a primary,
|
||||||
|
then upgrade the former primary.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
For <emphasis>major</emphasis> version upgrades (e.g. from PostgreSQL 9.6 to PostgreSQL 10),
|
||||||
|
the traditional approach is to "reseed" a cluster by upgrading a single
|
||||||
|
node with <ulink url="https://www.postgresql.org/docs/current/static/pgupgrade.html">pg_upgrade</ulink>
|
||||||
|
and recloning standbys from this.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
To minimize downtime during major upgrades, for more recent PostgreSQL
|
||||||
|
versions <ulink url="https://www.2ndquadrant.com/en/resources/pglogical/">pglogical</ulink>
|
||||||
|
can be used to set up a parallel cluster using the newer PostgreSQL version,
|
||||||
|
which can be kept in sync with the existing production cluster until the
|
||||||
|
new cluster is ready to be put into production.
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="faq-repmgr" xreflabel="repmgr">
|
<sect1 id="faq-repmgr" xreflabel="repmgr">
|
||||||
<title><command>repmgr</command></title>
|
<title><command>repmgr</command></title>
|
||||||
|
|
||||||
<sect2 id="faq-register-existing-node" xreflabel="">
|
<sect2 id="faq-register-existing-node" xreflabel="registering an existing node">
|
||||||
<title>Can I register an existing PostgreSQL server with repmgr?</title>
|
<title>Can I register an existing PostgreSQL server with repmgr?</title>
|
||||||
<para>
|
<para>
|
||||||
Yes, any existing PostgreSQL server which is part of the same replication
|
Yes, any existing PostgreSQL server which is part of the same replication
|
||||||
@@ -82,6 +105,18 @@
|
|||||||
standby to have been cloned using &repmgr;.
|
standby to have been cloned using &repmgr;.
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
<sect2 id="faq-repmgr-clone-other-source" >
|
||||||
|
<title>Can I use a standby not cloned by &repmgr; as a &repmgr; node?</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
For a standby which has been manually cloned or recovered from an external
|
||||||
|
backup manager such as Barman, the command
|
||||||
|
<command><link linkend="repmgr-standby-clone">repmgr standby clone --recovery-conf-only</link></command>
|
||||||
|
can be used to create the correct <filename>recovery.conf</filename> file for
|
||||||
|
use with &repmgr; (and will create a replication slot if required). Once this has been done,
|
||||||
|
<link linkend="repmgr-standby-register">register the node</link> as usual.
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
<sect2 id="faq-repmgr-failed-primary-standby" xreflabel="Reintegrate a failed primary as a standby">
|
<sect2 id="faq-repmgr-failed-primary-standby" xreflabel="Reintegrate a failed primary as a standby">
|
||||||
<title>How can a failed primary be re-added as a standby?</title>
|
<title>How can a failed primary be re-added as a standby?</title>
|
||||||
@@ -180,6 +215,9 @@
|
|||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="faq-repmgrd" xreflabel="repmgrd">
|
<sect1 id="faq-repmgrd" xreflabel="repmgrd">
|
||||||
|
|||||||
@@ -15,9 +15,124 @@
|
|||||||
See also: <xref linkend="upgrading-repmgr">
|
See also: <xref linkend="upgrading-repmgr">
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<sect1 id="release-4.0.4">
|
||||||
|
<title>Release 4.0.4</title>
|
||||||
|
<para><emphasis>Thu Mar 8, 2018</emphasis></para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
&repmgr; 4.0.4 contains some bug fixes and and a number of
|
||||||
|
usability enhancements related to logging/diagnostics,
|
||||||
|
event notifications and pre-action checks.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This release can be installed as a simple package upgrade from repmgr 4.0 ~ 4.0.3;
|
||||||
|
<application>repmgrd</application> (if running) should be restarted. See <xref linkend="upgrading-repmgr">
|
||||||
|
for more details.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>Usability enhancements</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
<itemizedlist>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
add <command><link linkend="repmgr-standby-clone">repmgr standby clone --recovery-conf-only</link></command>
|
||||||
|
option to enable integration of a standby cloned from another source into a &repmgr; cluster (GitHub #382)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
remove restriction on using replication slots when cloning from a Barman server (GitHub #379)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
make <command><link linkend="repmgr-standby-promote">repmgr standby promote</link></command>
|
||||||
|
timeout values configurable (GitHub #387)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
add missing options to main <literal>--help</literal> output (GitHub #391, #392)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>Bug fixes</title>
|
||||||
|
<para>
|
||||||
|
|
||||||
|
<itemizedlist>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
ensure <command><link linkend="repmgr-node-rejoin">repmgr node rejoin</link></command>
|
||||||
|
honours the <option>--dry-run</option> option (GitHub #383)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
improve replication slot warnings generated by
|
||||||
|
<command><link linkend="repmgr-node-status">repmgr node status</link></command>
|
||||||
|
(GitHub #385)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
fix --superuser handling when cloning a standby (GitHub #380)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<application>repmgrd</application>: improve detection of status change from primary to
|
||||||
|
standby
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<application>repmgrd</application>: improve reconnection to the local node after a
|
||||||
|
failover (previously a connection error due to the node starting up was being
|
||||||
|
interpreted as the node being unavailable)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<application>repmgrd</application>: when running on a witness server, correctly connect
|
||||||
|
to new primary after a failover
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<application>repmgrd</application>: add <link linkend="event-notifications">event notification</link>
|
||||||
|
<literal>repmgrd_shutdown</literal> (GitHub #393)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="release-4.0.3">
|
<sect1 id="release-4.0.3">
|
||||||
<title>Release 4.0.3</title>
|
<title>Release 4.0.3</title>
|
||||||
<para><emphasis>??? Feb ??, 2018</emphasis></para>
|
<para><emphasis>Thu Feb 15, 2018</emphasis></para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
&repmgr; 4.0.3 contains some bug fixes and and a number of
|
&repmgr; 4.0.3 contains some bug fixes and and a number of
|
||||||
@@ -25,6 +140,10 @@
|
|||||||
event notifications and pre-action checks.
|
event notifications and pre-action checks.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
This release can be installed as a simple package upgrade from repmgr 4.0 ~ 4.0.2;
|
||||||
|
repmgrd (if running) should be restarted.
|
||||||
|
</para>
|
||||||
<sect2>
|
<sect2>
|
||||||
<title>Usability enhancements</title>
|
<title>Usability enhancements</title>
|
||||||
|
|
||||||
@@ -65,16 +184,24 @@
|
|||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
add --dry-run mode to <command><link linkend="repmgr-standby-switchover">repmgr standby follow</link></command>
|
add --dry-run mode to <command><link linkend="repmgr-standby-switchover">repmgr standby follow</link></command>
|
||||||
(GitHub #369)
|
(GitHub #368)
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
add <literal>standby_register_sync</literal> event notification, which is fired when
|
provide information about the primary node for
|
||||||
|
<command><link linkend="repmgr-standby-register">repmgr standby register</link></command> and
|
||||||
|
<command><link linkend="repmgr-standby-follow">repmgr standby follow</link></command> event notifications (GitHub #375)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
add <literal>standby_register_sync</literal> <link linkend="event-notifications">event notification</link>, which is fired when
|
||||||
<command><link linkend="repmgr-standby-register">repmgr standby register</link></command>
|
<command><link linkend="repmgr-standby-register">repmgr standby register</link></command>
|
||||||
is run with the <option>--wait-sync</option> option and the new or updated standby node
|
is run with the <option>--wait-sync</option> option and the new or updated standby node
|
||||||
record has synchronised to the standy (GitHub #374)
|
record has synchronised to the standby (GitHub #374)
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,7 @@
|
|||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Execution</title>
|
<title>Execution</title>
|
||||||
<para>
|
<para>
|
||||||
Execute with the <literal>--dry-run</literal> option to check what would happen without
|
Execute with the <option>--dry-run</option> option to check what would happen without
|
||||||
actually registering the primary.
|
actually registering the primary.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
@@ -36,7 +36,7 @@
|
|||||||
|
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
If providing the configuration file location with <literal>-f/--config-file</literal>,
|
If providing the configuration file location with <option>-f/--config-file</option>,
|
||||||
avoid using a relative path, as &repmgr; stores the configuration file location
|
avoid using a relative path, as &repmgr; stores the configuration file location
|
||||||
in the repmgr metadata for use when &repmgr; is executed remotely (e.g. during
|
in the repmgr metadata for use when &repmgr; is executed remotely (e.g. during
|
||||||
<xref linkend="repmgr-standby-switchover">). &repmgr; will attempt to convert the
|
<xref linkend="repmgr-standby-switchover">). &repmgr; will attempt to convert the
|
||||||
@@ -48,6 +48,33 @@
|
|||||||
</note>
|
</note>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
|
||||||
|
<title>Options</title>
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--dry-run</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Check prerequisites but don't actually register the primary.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-F</option><option>--force</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Overwrite an existing node record
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Event notifications</title>
|
<title>Event notifications</title>
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -22,7 +22,7 @@
|
|||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Execution</title>
|
<title>Execution</title>
|
||||||
<para>
|
<para>
|
||||||
<command>repmgr primary unregister</command> should be run on the current primary,
|
<command>repmgr primary unregister</command> can be run on any active &repmgr; node,
|
||||||
with the ID of the node to unregister passed as <option>--node-id</option>.
|
with the ID of the node to unregister passed as <option>--node-id</option>.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -100,6 +100,150 @@
|
|||||||
</note>
|
</note>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
|
<refsect1 id="repmgr-standby-create-recovery-conf">
|
||||||
|
<title>Using a standby cloned by another method</title>
|
||||||
|
<para>
|
||||||
|
&repmgr; supports standbys cloned by another method (e.g. using <application>barman</application>'s
|
||||||
|
<command>barman recover</command> command).
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
To integrate the standby as a &repmgr; node, ensure the <filename>repmgr.conf</filename>
|
||||||
|
file is created for the node, then execute the command
|
||||||
|
<command>repmgr standby clone --recovery-conf-only</command>.
|
||||||
|
This will create the <filename>recovery.conf</filename> file needed to attach
|
||||||
|
the node to its upstream, and will also create a replication slot on the
|
||||||
|
upstream node if required.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that the upstream node must be running. An existing
|
||||||
|
<filename>recovery.conf</filename> will not be overwritten unless the
|
||||||
|
<option>-F/--force</option> option is provided.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Execute <command>repmgr standby clone --recovery-conf-only --dry-run</command>
|
||||||
|
to check the prerequisites for creating the <filename>recovery.conf</filename> file,
|
||||||
|
and display the contents of the file without actually creating it.
|
||||||
|
</para>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
|
||||||
|
<title>Options</title>
|
||||||
|
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--dry-run</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Check prerequisites but don't actually clone the standby.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If <option>--recovery-conf-only</option> specified, the contents of
|
||||||
|
the generated <filename>recovery.conf</filename> file will be displayed
|
||||||
|
but the file itself not written.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-c, --fast-checkpoint</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
force fast checkpoint (not effective when cloning from Barman
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--copy-external-config-files[={samepath|pgdata}]</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
copy configuration files located outside the data directory on the source
|
||||||
|
node to the same path on the standby (default) or to the
|
||||||
|
PostgreSQL data directory.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--no-upstream-connection</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
when using Barman, do not connect to upstream node
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-R, --remote-user=USERNAME</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
remote system username for SSH operations (default: current local system username)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option> --recovery-conf-only</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
create <filename>recovery.conf</filename> file for a previously cloned instance
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--replication-user</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
user to make replication connections with (optional, not usually required)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--superuser</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
if the &repmgr; user is not a superuser, the name of a valid superuser must
|
||||||
|
be provided with this option
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--upstream-conninfo</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<literal>primary_conninfo</literal> value to write in recovery.conf
|
||||||
|
when the intended upstream server does not yet exist
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--upstream-node-id</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
ID of the upstream node to replicate from (optional, defaults to primary node)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--without-barman </option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
do not use Barman even if configured
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Event notifications</title>
|
<title>Event notifications</title>
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -30,6 +30,7 @@
|
|||||||
To re-add an inactive node to the replication cluster, see
|
To re-add an inactive node to the replication cluster, see
|
||||||
<xref linkend="repmgr-node-rejoin">
|
<xref linkend="repmgr-node-rejoin">
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
@@ -74,7 +75,10 @@
|
|||||||
<term><option>--wait</option></term>
|
<term><option>--wait</option></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Wait for a primary to appear.
|
Wait for a primary to appear. &repmgr; will wait for up to
|
||||||
|
<varname>primary_follow_timeout</varname> seconds
|
||||||
|
(default: 60 seconds) to verify that the standby is following the new primary.
|
||||||
|
This value can be defined in <filename>repmgr.conf</filename>.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|||||||
@@ -26,6 +26,12 @@
|
|||||||
by using <xref linkend="repmgr-standby-follow">; if <application>repmgrd</application>
|
by using <xref linkend="repmgr-standby-follow">; if <application>repmgrd</application>
|
||||||
is active, it will handle this automatically.
|
is active, it will handle this automatically.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that &repmgr; will wait for up to <varname>promote_check_timeout</varname> seconds
|
||||||
|
(default: 60 seconds) to verify that the standby has been promoted, and will
|
||||||
|
check the promotion every <varname>promote_check_interval</varname> seconds (default: 1 second).
|
||||||
|
Both values can be defined in <filename>repmgr.conf</filename>.
|
||||||
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
@@ -42,6 +48,7 @@
|
|||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Event notifications</title>
|
<title>Event notifications</title>
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -92,6 +92,73 @@
|
|||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1 id="repmgr-standby-register-node-cloned-other-source">
|
||||||
|
<title>Registering a node not cloned by repmgr</title>
|
||||||
|
<para>
|
||||||
|
If you've cloned a standby using another method (e.g. <application>barman</application>'s
|
||||||
|
<command>barman recover</command> command), first execute
|
||||||
|
<link linkend="repmgr-standby-create-recovery-conf">repmgr standby clone --recovery-conf-only</link>
|
||||||
|
to add the <filename>recovery.conf</filename> file, then register the standby as usual.
|
||||||
|
</para>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
|
||||||
|
<title>Options</title>
|
||||||
|
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--dry-run</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Check prerequisites but don't actually register the standby.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-F</option><option>--force</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Overwrite an existing node record
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--upstream-node-id</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
ID of the upstream node to replicate from (optional)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--wait-start</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
wait for the standby to start (timeout in seconds, default 30 seconds)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--wait-sync</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
wait for the node record to synchronise to the standby (optional timeout in seconds)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Event notifications</title>
|
<title>Event notifications</title>
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -43,6 +43,22 @@
|
|||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Options</title>
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--node-id</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<varname>node_id</varname> of the node to unregister (optional)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Event notifications</title>
|
<title>Event notifications</title>
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -40,7 +40,7 @@
|
|||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>repmgrd is monitoring the primary node, but it is not available</simpara>
|
<simpara>repmgrd is monitoring the primary node, but it is not available (and no other node has been promoted as primary)</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
@@ -69,7 +69,15 @@
|
|||||||
By default, <literal>repmgrd</literal> will continue in degraded monitoring mode indefinitely.
|
By default, <literal>repmgrd</literal> will continue in degraded monitoring mode indefinitely.
|
||||||
However a timeout (in seconds) can be set with <varname>degraded_monitoring_timeout</varname>,
|
However a timeout (in seconds) can be set with <varname>degraded_monitoring_timeout</varname>,
|
||||||
after which <application>repmgrd</application> will terminate.
|
after which <application>repmgrd</application> will terminate.
|
||||||
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
If <application>repmgrd</application> is monitoring a primary mode which has been stopped
|
||||||
|
and manually restarted as a standby attached to a new primary, it will automatically detect
|
||||||
|
the status change and update the node record to reflect the node's new status
|
||||||
|
as an active standby. It will then resume monitoring the node as a standby.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
</chapter>
|
</chapter>
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
<!ENTITY repmgrversion "4.0.3">
|
<!ENTITY repmgrversion "4.0.4">
|
||||||
|
|||||||
@@ -92,7 +92,7 @@ do_node_status(void)
|
|||||||
|
|
||||||
/* Check node exists and is really a standby */
|
/* Check node exists and is really a standby */
|
||||||
|
|
||||||
if (get_node_record(conn, config_file_options.node_id, &node_info) != RECORD_FOUND)
|
if (get_node_record_with_upstream(conn, config_file_options.node_id, &node_info) != RECORD_FOUND)
|
||||||
{
|
{
|
||||||
log_error(_("no record found for node %i"), config_file_options.node_id);
|
log_error(_("no record found for node %i"), config_file_options.node_id);
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
@@ -308,14 +308,16 @@ do_node_status(void)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* check for missing replication slots - we do this regardless of
|
* check for missing replication slots - we do this regardless of
|
||||||
* what "max_replication_slots" is set to
|
* what "max_replication_slots" is set to, in case the downstream
|
||||||
|
* node was configured with "use_replication_slots=true" and is
|
||||||
|
* expecting a replication slot to be available
|
||||||
*/
|
*/
|
||||||
|
|
||||||
{
|
{
|
||||||
NodeInfoList missing_slots = T_NODE_INFO_LIST_INITIALIZER;
|
NodeInfoList missing_slots = T_NODE_INFO_LIST_INITIALIZER;
|
||||||
get_downsteam_nodes_with_missing_slot(conn,
|
get_downstream_nodes_with_missing_slot(conn,
|
||||||
config_file_options.node_id,
|
config_file_options.node_id,
|
||||||
&missing_slots);
|
&missing_slots);
|
||||||
|
|
||||||
if (missing_slots.node_count > 0)
|
if (missing_slots.node_count > 0)
|
||||||
{
|
{
|
||||||
@@ -1814,7 +1816,7 @@ do_node_rejoin(void)
|
|||||||
* Forcibly rewind node if requested (this is mainly for use when this
|
* Forcibly rewind node if requested (this is mainly for use when this
|
||||||
* action is being executed by "repmgr standby switchover")
|
* action is being executed by "repmgr standby switchover")
|
||||||
*/
|
*/
|
||||||
if (runtime_options.force_rewind == true)
|
if (runtime_options.force_rewind == true && runtime_options.dry_run == false)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
PQExpBufferData filebuf;
|
PQExpBufferData filebuf;
|
||||||
@@ -1949,6 +1951,12 @@ do_node_rejoin(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
log_info(_("prerequisites for executing NODE REJOIN are met"));
|
||||||
|
exit(SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
initPQExpBuffer(&follow_output);
|
initPQExpBuffer(&follow_output);
|
||||||
|
|
||||||
success = do_standby_follow_internal(upstream_conn,
|
success = do_standby_follow_internal(upstream_conn,
|
||||||
|
|||||||
@@ -73,6 +73,7 @@ static char datadir_list_filename[MAXLEN];
|
|||||||
static char barman_command_buf[MAXLEN] = "";
|
static char barman_command_buf[MAXLEN] = "";
|
||||||
|
|
||||||
static void _do_standby_promote_internal(PGconn *conn, const char *data_dir);
|
static void _do_standby_promote_internal(PGconn *conn, const char *data_dir);
|
||||||
|
static void _do_create_recovery_conf(void);
|
||||||
|
|
||||||
static void check_barman_config(void);
|
static void check_barman_config(void);
|
||||||
static void check_source_server(void);
|
static void check_source_server(void);
|
||||||
@@ -95,9 +96,8 @@ static void get_barman_property(char *dst, char *name, char *local_repmgr_direct
|
|||||||
static int get_tablespace_data_barman(char *, TablespaceDataList *);
|
static int get_tablespace_data_barman(char *, TablespaceDataList *);
|
||||||
static char *make_barman_ssh_command(char *buf);
|
static char *make_barman_ssh_command(char *buf);
|
||||||
|
|
||||||
static bool create_recovery_file(t_node_info *node_record, t_conninfo_param_list *recovery_conninfo, const char *data_dir);
|
static bool create_recovery_file(t_node_info *node_record, t_conninfo_param_list *recovery_conninfo, char *dest, bool as_file);
|
||||||
static void write_primary_conninfo(char *line, t_conninfo_param_list *param_list);
|
static void write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list);
|
||||||
static bool write_recovery_file_line(FILE *recovery_file, char *recovery_file_path, char *line);
|
|
||||||
|
|
||||||
static NodeStatus parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr *checkPoint);
|
static NodeStatus parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr *checkPoint);
|
||||||
static CheckStatus parse_node_check_archiver(const char *node_check_output, int *files, int *threshold);
|
static CheckStatus parse_node_check_archiver(const char *node_check_output, int *files, int *threshold);
|
||||||
@@ -119,6 +119,7 @@ static ConnectionStatus parse_remote_node_replication_connection(const char *nod
|
|||||||
* --recovery-min-apply-delay
|
* --recovery-min-apply-delay
|
||||||
* --replication-user (only required if no upstream record)
|
* --replication-user (only required if no upstream record)
|
||||||
* --without-barman
|
* --without-barman
|
||||||
|
* --recovery-conf-only
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -128,7 +129,15 @@ do_standby_clone(void)
|
|||||||
int r = 0;
|
int r = 0;
|
||||||
|
|
||||||
/* dummy node record */
|
/* dummy node record */
|
||||||
t_node_info node_record = T_NODE_INFO_INITIALIZER;
|
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* --recovery-conf-only provided - we'll handle that separately
|
||||||
|
*/
|
||||||
|
if (runtime_options.recovery_conf_only == true)
|
||||||
|
{
|
||||||
|
return _do_create_recovery_conf();
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* conninfo params for the actual upstream node (which might be different
|
* conninfo params for the actual upstream node (which might be different
|
||||||
@@ -177,8 +186,8 @@ do_standby_clone(void)
|
|||||||
check_barman_config();
|
check_barman_config();
|
||||||
}
|
}
|
||||||
|
|
||||||
init_node_record(&node_record);
|
init_node_record(&local_node_record);
|
||||||
node_record.type = STANDBY;
|
local_node_record.type = STANDBY;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialise list of conninfo parameters which will later be used to
|
* Initialise list of conninfo parameters which will later be used to
|
||||||
@@ -521,7 +530,7 @@ do_standby_clone(void)
|
|||||||
|
|
||||||
if (mode != barman)
|
if (mode != barman)
|
||||||
{
|
{
|
||||||
initialise_direct_clone(&node_record);
|
initialise_direct_clone(&local_node_record);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (mode)
|
switch (mode)
|
||||||
@@ -545,13 +554,17 @@ do_standby_clone(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mode == pg_basebackup)
|
switch (mode)
|
||||||
{
|
{
|
||||||
r = run_basebackup(&node_record);
|
case pg_basebackup:
|
||||||
}
|
r = run_basebackup(&local_node_record);
|
||||||
else
|
break;
|
||||||
{
|
case barman:
|
||||||
r = run_file_backup(&node_record);
|
r = run_file_backup(&local_node_record);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
/* should never reach here */
|
||||||
|
log_error(_("unknown clone mode"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -561,7 +574,7 @@ do_standby_clone(void)
|
|||||||
/* If a replication slot was previously created, drop it */
|
/* If a replication slot was previously created, drop it */
|
||||||
if (config_file_options.use_replication_slots == true)
|
if (config_file_options.use_replication_slots == true)
|
||||||
{
|
{
|
||||||
drop_replication_slot(source_conn, node_record.slot_name);
|
drop_replication_slot(source_conn, local_node_record.slot_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
log_error(_("unable to take a base backup of the primary server"));
|
log_error(_("unable to take a base backup of the primary server"));
|
||||||
@@ -588,7 +601,7 @@ do_standby_clone(void)
|
|||||||
|
|
||||||
/* Write the recovery.conf file */
|
/* Write the recovery.conf file */
|
||||||
|
|
||||||
if (create_recovery_file(&node_record, &recovery_conninfo, local_data_directory) == false)
|
if (create_recovery_file(&local_node_record, &recovery_conninfo, local_data_directory, true) == false)
|
||||||
{
|
{
|
||||||
/* create_recovery_file() will log an error */
|
/* create_recovery_file() will log an error */
|
||||||
log_notice(_("unable to create recovery.conf; see preceding error messages"));
|
log_notice(_("unable to create recovery.conf; see preceding error messages"));
|
||||||
@@ -789,6 +802,375 @@ check_barman_config(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* _do_create_recovery_conf()
|
||||||
|
*
|
||||||
|
* Create recovery.conf for a previously cloned instance.
|
||||||
|
*
|
||||||
|
* Prerequisites:
|
||||||
|
*
|
||||||
|
* - data directory must be provided
|
||||||
|
* - the instance should not be running
|
||||||
|
* - an existing "recovery.conf" file can only be overwritten with
|
||||||
|
* -F/--force
|
||||||
|
* - connection parameters for an existing, running node must be provided
|
||||||
|
* - --upstream-node-id, if provided, will be "primary_conninfo",
|
||||||
|
* otherwise primary node id; node must exist; unless -F/--force
|
||||||
|
* provided, must be active and connection possible
|
||||||
|
* - if replication slots in use, create (respect --dry-run)
|
||||||
|
*
|
||||||
|
* not compatible with --no-upstream-connection
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
_do_create_recovery_conf(void)
|
||||||
|
{
|
||||||
|
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
|
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||||
|
char recovery_file_path[MAXPGPATH] = "";
|
||||||
|
struct stat st;
|
||||||
|
bool node_is_running = false;
|
||||||
|
bool slot_creation_required = false;
|
||||||
|
PGconn *upstream_conn = NULL;
|
||||||
|
PGconn *upstream_repl_conn = NULL;
|
||||||
|
|
||||||
|
get_node_data_directory(local_data_directory);
|
||||||
|
|
||||||
|
if (local_data_directory[0] == '\0')
|
||||||
|
{
|
||||||
|
log_error(_("no data directory provided"));
|
||||||
|
log_hint(_("provide the node's \"repmgr.conf\" file with -f/--config-file or the data directory with -D/--pgdata"));
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Do some sanity checks on the data directory to make sure
|
||||||
|
* it contains a valid but dormant instance
|
||||||
|
*/
|
||||||
|
switch (check_dir(local_data_directory))
|
||||||
|
{
|
||||||
|
case DIR_ERROR:
|
||||||
|
log_error(_("unable to access specified data directory \"%s\""), local_data_directory);
|
||||||
|
log_detail("%s", strerror(errno));
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
break;
|
||||||
|
case DIR_NOENT:
|
||||||
|
log_error(_("specified data directory \"%s\" does not exist"), local_data_directory);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
break;
|
||||||
|
case DIR_EMPTY:
|
||||||
|
log_error(_("specified data directory \"%s\" is empty"), local_data_directory);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
break;
|
||||||
|
case DIR_NOT_EMPTY:
|
||||||
|
/* Present but not empty */
|
||||||
|
if (!is_pg_dir(local_data_directory))
|
||||||
|
{
|
||||||
|
log_error(_("specified data directory \"%s\" does not contain a PostgreSQL instance"), local_data_directory);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_pg_running(local_data_directory))
|
||||||
|
{
|
||||||
|
if (runtime_options.force == false)
|
||||||
|
{
|
||||||
|
log_error(_("specified data directory \"%s\" appears to contain a running PostgreSQL instance"),
|
||||||
|
local_data_directory);
|
||||||
|
log_hint(_("use -F/--force to create \"recovery.conf\" anyway"));
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
node_is_running = true;
|
||||||
|
|
||||||
|
if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
log_warning(_("\"recovery.conf\" would be created in an active data directory"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_warning(_("creating \"recovery.conf\" in an active data directory"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check connection */
|
||||||
|
source_conn = establish_db_connection_by_params(&source_conninfo, true);
|
||||||
|
|
||||||
|
/* determine node for primary_conninfo */
|
||||||
|
|
||||||
|
if (runtime_options.upstream_node_id != UNKNOWN_NODE_ID)
|
||||||
|
{
|
||||||
|
upstream_node_id = runtime_options.upstream_node_id;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* if --upstream-node-id not specifically supplied, get primary node id */
|
||||||
|
upstream_node_id = get_primary_node_id(source_conn);
|
||||||
|
|
||||||
|
if (upstream_node_id == NODE_NOT_FOUND)
|
||||||
|
{
|
||||||
|
log_error(_("unable to determine primary node for this replication cluster"));
|
||||||
|
PQfinish(source_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_debug("primary node determined as: %i", upstream_node_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* attempt to retrieve upstream node record */
|
||||||
|
record_status = get_node_record(source_conn,
|
||||||
|
upstream_node_id,
|
||||||
|
&upstream_node_record);
|
||||||
|
|
||||||
|
if (record_status != RECORD_FOUND)
|
||||||
|
{
|
||||||
|
log_error(_("unable to retrieve node record for upstream node %i"), upstream_node_id);
|
||||||
|
|
||||||
|
if (record_status == RECORD_ERROR)
|
||||||
|
{
|
||||||
|
log_detail("%s", PQerrorMessage(source_conn));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* attempt to retrieve local node record */
|
||||||
|
record_status = get_node_record(source_conn,
|
||||||
|
config_file_options.node_id,
|
||||||
|
&local_node_record);
|
||||||
|
|
||||||
|
if (record_status != RECORD_FOUND)
|
||||||
|
{
|
||||||
|
log_error(_("unable to retrieve node record for local node %i"), config_file_options.node_id);
|
||||||
|
|
||||||
|
if (record_status == RECORD_ERROR)
|
||||||
|
{
|
||||||
|
log_detail("%s", PQerrorMessage(source_conn));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
PQfinish(source_conn);
|
||||||
|
|
||||||
|
|
||||||
|
/* connect to upstream (which could be different to source) */
|
||||||
|
|
||||||
|
upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
|
||||||
|
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_error(_("unable to connect to upstream node \"%s\" (ID: %i)"),
|
||||||
|
upstream_node_record.node_name,
|
||||||
|
upstream_node_id);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set the application name to this node's name */
|
||||||
|
if (config_file_options.node_name[0] != '\0')
|
||||||
|
param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
|
||||||
|
|
||||||
|
/* Set the replication user from the primary node record */
|
||||||
|
param_set(&recovery_conninfo, "user", upstream_node_record.repluser);
|
||||||
|
|
||||||
|
initialize_conninfo_params(&recovery_conninfo, false);
|
||||||
|
|
||||||
|
/* We ignore any application_name set in the primary's conninfo */
|
||||||
|
parse_conninfo_string(upstream_node_record.conninfo, &recovery_conninfo, NULL, true);
|
||||||
|
|
||||||
|
/* check that a replication connection can be made (--force = override) */
|
||||||
|
upstream_repl_conn = establish_db_connection_by_params(&recovery_conninfo, false);
|
||||||
|
|
||||||
|
if (PQstatus(upstream_repl_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
if (runtime_options.force == false)
|
||||||
|
{
|
||||||
|
log_error(_("unable to initiate replication connection to upstream node \"%s\" (ID: %i)"),
|
||||||
|
upstream_node_record.node_name,
|
||||||
|
upstream_node_id);
|
||||||
|
PQfinish(upstream_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if replication slots are in use, perform some checks */
|
||||||
|
if (config_file_options.use_replication_slots == true)
|
||||||
|
{
|
||||||
|
PQExpBufferData msg;
|
||||||
|
t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
|
||||||
|
|
||||||
|
record_status = get_slot_record(upstream_conn, local_node_record.slot_name, &slot_info);
|
||||||
|
|
||||||
|
/* check if replication slot exists*/
|
||||||
|
if (record_status == RECORD_FOUND)
|
||||||
|
{
|
||||||
|
if (slot_info.active == true)
|
||||||
|
{
|
||||||
|
initPQExpBuffer(&msg);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&msg,
|
||||||
|
_("an active replication slot named \"%s\" already exists on upstream node \"%s\" (ID: %i)"),
|
||||||
|
local_node_record.slot_name,
|
||||||
|
upstream_node_record.node_name,
|
||||||
|
upstream_node_id);
|
||||||
|
if (runtime_options.force == false && runtime_options.dry_run == false)
|
||||||
|
{
|
||||||
|
log_error("%s", msg.data);
|
||||||
|
log_hint(_("use -F/--force to continue anyway"));
|
||||||
|
termPQExpBuffer(&msg);
|
||||||
|
PQfinish(upstream_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_warning("%s", msg.data);
|
||||||
|
termPQExpBuffer(&msg);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_info(_("an inactive replication slot for this node exists on the upstream node"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* if not, if check one can and should be created */
|
||||||
|
else
|
||||||
|
{
|
||||||
|
get_node_replication_stats(upstream_conn, UNKNOWN_SERVER_VERSION_NUM, &upstream_node_record);
|
||||||
|
|
||||||
|
if (upstream_node_record.max_replication_slots > upstream_node_record.total_replication_slots)
|
||||||
|
{
|
||||||
|
slot_creation_required = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
initPQExpBuffer(&msg);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&msg,
|
||||||
|
_("insufficient free replicaiton slots on upstream node \"%s\" (ID: %i)"),
|
||||||
|
upstream_node_record.node_name,
|
||||||
|
upstream_node_id);
|
||||||
|
|
||||||
|
if (runtime_options.force == false && runtime_options.dry_run == false)
|
||||||
|
{
|
||||||
|
log_error("%s", msg.data);
|
||||||
|
log_hint(_("use -F/--force to continue anyway"));
|
||||||
|
termPQExpBuffer(&msg);
|
||||||
|
PQfinish(upstream_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_warning("%s", msg.data);
|
||||||
|
termPQExpBuffer(&msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check if recovery.conf exists */
|
||||||
|
|
||||||
|
maxpath_snprintf(recovery_file_path, "%s/%s", local_data_directory, RECOVERY_COMMAND_FILE);
|
||||||
|
|
||||||
|
if (stat(recovery_file_path, &st) == -1)
|
||||||
|
{
|
||||||
|
if (errno != ENOENT)
|
||||||
|
{
|
||||||
|
log_error(_("unable to check for existing \"recovery.conf\" file in \"%s\""),
|
||||||
|
local_data_directory);
|
||||||
|
log_detail("%s", strerror(errno));
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (runtime_options.force == false)
|
||||||
|
{
|
||||||
|
log_error(_("\"recovery.conf\" already exists in \"%s\""),
|
||||||
|
local_data_directory);
|
||||||
|
log_hint(_("use -F/--force to overwrite an existing \"recovery.conf\" file"));
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
log_warning(_("the existing \"recovery.conf\" file would be overwritten"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_warning(_("the existing \"recovery.conf\" file will be overwritten"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
char recovery_conf_contents[MAXLEN] = "";
|
||||||
|
create_recovery_file(&upstream_node_record, &recovery_conninfo, recovery_conf_contents, false);
|
||||||
|
|
||||||
|
log_info(_("would create \"recovery.conf\" file in \"%s\""), local_data_directory);
|
||||||
|
log_detail(_("\n%s"), recovery_conf_contents);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (!create_recovery_file(&upstream_node_record, &recovery_conninfo, local_data_directory, true))
|
||||||
|
{
|
||||||
|
log_error(_("unable to create \"recovery.conf\""));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_notice(_("\"recovery.conf\" created as \"%s\""), recovery_file_path);
|
||||||
|
|
||||||
|
if (node_is_running == true)
|
||||||
|
{
|
||||||
|
log_hint(_("node must be restarted for the new file to take effect"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* add replication slot, if required */
|
||||||
|
if (slot_creation_required == true)
|
||||||
|
{
|
||||||
|
if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
log_info(_("would create replication slot \"%s\" on upstream node \"%s\" (ID: %i)"),
|
||||||
|
local_node_record.slot_name,
|
||||||
|
upstream_node_record.node_name,
|
||||||
|
upstream_node_id);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PQExpBufferData msg;
|
||||||
|
initPQExpBuffer(&msg);
|
||||||
|
|
||||||
|
if (create_replication_slot(upstream_conn,
|
||||||
|
local_node_record.slot_name,
|
||||||
|
UNKNOWN_SERVER_VERSION_NUM,
|
||||||
|
&msg) == false)
|
||||||
|
{
|
||||||
|
log_error("%s", msg.data);
|
||||||
|
PQfinish(upstream_conn);
|
||||||
|
termPQExpBuffer(&msg);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
termPQExpBuffer(&msg);
|
||||||
|
|
||||||
|
log_notice(_("replication slot \"%s\" created on upstream node \"%s\" (ID: %i)"),
|
||||||
|
local_node_record.slot_name,
|
||||||
|
upstream_node_record.node_name,
|
||||||
|
upstream_node_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PQfinish(upstream_conn);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* do_standby_register()
|
* do_standby_register()
|
||||||
*
|
*
|
||||||
@@ -1516,10 +1898,8 @@ static void
|
|||||||
_do_standby_promote_internal(PGconn *conn, const char *data_dir)
|
_do_standby_promote_internal(PGconn *conn, const char *data_dir)
|
||||||
{
|
{
|
||||||
char script[MAXLEN];
|
char script[MAXLEN];
|
||||||
int r;
|
int r,
|
||||||
int i,
|
i;
|
||||||
promote_check_timeout = 60,
|
|
||||||
promote_check_interval = 2;
|
|
||||||
bool promote_success = false;
|
bool promote_success = false;
|
||||||
PQExpBufferData details;
|
PQExpBufferData details;
|
||||||
|
|
||||||
@@ -1566,8 +1946,7 @@ _do_standby_promote_internal(PGconn *conn, const char *data_dir)
|
|||||||
exit(ERR_PROMOTION_FAIL);
|
exit(ERR_PROMOTION_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: make these values configurable */
|
for (i = 0; i < config_file_options.promote_check_timeout; i += config_file_options.promote_check_interval)
|
||||||
for (i = 0; i < promote_check_timeout; i += promote_check_interval)
|
|
||||||
{
|
{
|
||||||
recovery_type = get_recovery_type(conn);
|
recovery_type = get_recovery_type(conn);
|
||||||
|
|
||||||
@@ -1576,7 +1955,7 @@ _do_standby_promote_internal(PGconn *conn, const char *data_dir)
|
|||||||
promote_success = true;
|
promote_success = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
sleep(promote_check_interval);
|
sleep(config_file_options.promote_check_interval);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (promote_success == false)
|
if (promote_success == false)
|
||||||
@@ -1595,6 +1974,7 @@ _do_standby_promote_internal(PGconn *conn, const char *data_dir)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log_verbose(LOG_INFO, _("standby promoted to primary after %i second(s)"), i);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Execute a CHECKPOINT as soon as possible after promotion. The primary
|
* Execute a CHECKPOINT as soon as possible after promotion. The primary
|
||||||
@@ -2056,7 +2436,7 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor
|
|||||||
log_notice(_("setting node %i's primary to node %i"),
|
log_notice(_("setting node %i's primary to node %i"),
|
||||||
config_file_options.node_id, primary_node_record->node_id);
|
config_file_options.node_id, primary_node_record->node_id);
|
||||||
|
|
||||||
if (!create_recovery_file(&local_node_record, &recovery_conninfo, config_file_options.data_directory))
|
if (!create_recovery_file(&local_node_record, &recovery_conninfo, config_file_options.data_directory, true))
|
||||||
{
|
{
|
||||||
/* XXX ERR_RECOVERY_FILE ??? */
|
/* XXX ERR_RECOVERY_FILE ??? */
|
||||||
*error_code = ERR_BAD_CONFIG;
|
*error_code = ERR_BAD_CONFIG;
|
||||||
@@ -4005,10 +4385,14 @@ check_upstream_config(PGconn *conn, int server_version_num, t_node_info *node_in
|
|||||||
|
|
||||||
param_set(&repl_conninfo, "replication", "1");
|
param_set(&repl_conninfo, "replication", "1");
|
||||||
|
|
||||||
if (*runtime_options.replication_user)
|
if (runtime_options.replication_user[0] != '\0')
|
||||||
{
|
{
|
||||||
param_set(&repl_conninfo, "user", runtime_options.replication_user);
|
param_set(&repl_conninfo, "user", runtime_options.replication_user);
|
||||||
}
|
}
|
||||||
|
else if (upstream_repluser[0] != '\0')
|
||||||
|
{
|
||||||
|
param_set(&repl_conninfo, "user", upstream_repluser);
|
||||||
|
}
|
||||||
else if (node_info->repluser[0] != '\0')
|
else if (node_info->repluser[0] != '\0')
|
||||||
{
|
{
|
||||||
param_set(&repl_conninfo, "user", node_info->repluser);
|
param_set(&repl_conninfo, "user", node_info->repluser);
|
||||||
@@ -4060,12 +4444,13 @@ check_upstream_config(PGconn *conn, int server_version_num, t_node_info *node_in
|
|||||||
* XXX at this point we could check
|
* XXX at this point we could check
|
||||||
* current_setting('max_wal_senders) - COUNT(*) FROM
|
* current_setting('max_wal_senders) - COUNT(*) FROM
|
||||||
* pg_stat_replication; if >= min_replication_connections we could
|
* pg_stat_replication; if >= min_replication_connections we could
|
||||||
* infer possible authentication error.
|
* infer possible authentication error / lack of permissions.
|
||||||
*
|
*
|
||||||
* Alternatively call PQconnectStart() and poll for
|
* Alternatively call PQconnectStart() and poll for
|
||||||
* presence/absence of CONNECTION_AUTH_OK ?
|
* presence/absence of CONNECTION_AUTH_OK ?
|
||||||
*/
|
*/
|
||||||
log_error(_("unable to establish necessary replication connections"));
|
log_error(_("unable to establish necessary replication connections"));
|
||||||
|
|
||||||
log_hint(_("increase \"max_wal_senders\" by at least %i"),
|
log_hint(_("increase \"max_wal_senders\" by at least %i"),
|
||||||
min_replication_connections - possible_replication_connections);
|
min_replication_connections - possible_replication_connections);
|
||||||
|
|
||||||
@@ -4278,10 +4663,14 @@ run_basebackup(t_node_info *node_record)
|
|||||||
/* string will already have been parsed */
|
/* string will already have been parsed */
|
||||||
(void) parse_conninfo_string(runtime_options.dbname, &conninfo, NULL, false);
|
(void) parse_conninfo_string(runtime_options.dbname, &conninfo, NULL, false);
|
||||||
|
|
||||||
if (*runtime_options.replication_user)
|
if (runtime_options.replication_user[0] != '\0')
|
||||||
{
|
{
|
||||||
param_set(&conninfo, "user", runtime_options.replication_user);
|
param_set(&conninfo, "user", runtime_options.replication_user);
|
||||||
}
|
}
|
||||||
|
else if (upstream_repluser[0] != '\0')
|
||||||
|
{
|
||||||
|
param_set(&conninfo, "user", upstream_repluser);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
param_set(&conninfo, "user", node_record->repluser);
|
param_set(&conninfo, "user", node_record->repluser);
|
||||||
@@ -4314,6 +4703,10 @@ run_basebackup(t_node_info *node_record)
|
|||||||
{
|
{
|
||||||
appendPQExpBuffer(¶ms, " -U %s", runtime_options.replication_user);
|
appendPQExpBuffer(¶ms, " -U %s", runtime_options.replication_user);
|
||||||
}
|
}
|
||||||
|
else if (strlen(upstream_repluser))
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(¶ms, " -U %s", upstream_repluser);
|
||||||
|
}
|
||||||
else if (strlen(node_record->repluser))
|
else if (strlen(node_record->repluser))
|
||||||
{
|
{
|
||||||
appendPQExpBuffer(¶ms, " -U %s", node_record->repluser);
|
appendPQExpBuffer(¶ms, " -U %s", node_record->repluser);
|
||||||
@@ -4768,8 +5161,8 @@ run_file_backup(t_node_info *node_record)
|
|||||||
100000,
|
100000,
|
||||||
90500,
|
90500,
|
||||||
90400, 90400, 90400, 90400, 90400,
|
90400, 90400, 90400, 90400, 90400,
|
||||||
0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0,
|
||||||
0, -100000, 0
|
0, 0, 0, -100000
|
||||||
};
|
};
|
||||||
|
|
||||||
for (i = 0; dirs[i]; i++)
|
for (i = 0; dirs[i]; i++)
|
||||||
@@ -4895,7 +5288,7 @@ run_file_backup(t_node_info *node_record)
|
|||||||
if (unlink(tblspc_symlink.data) < 0 && errno != ENOENT)
|
if (unlink(tblspc_symlink.data) < 0 && errno != ENOENT)
|
||||||
{
|
{
|
||||||
log_error(_("unable to remove tablespace symlink %s"), tblspc_symlink.data);
|
log_error(_("unable to remove tablespace symlink %s"), tblspc_symlink.data);
|
||||||
|
log_detail("%s", strerror(errno));
|
||||||
r = ERR_BAD_BASEBACKUP;
|
r = ERR_BAD_BASEBACKUP;
|
||||||
goto stop_backup;
|
goto stop_backup;
|
||||||
}
|
}
|
||||||
@@ -4935,9 +5328,9 @@ run_file_backup(t_node_info *node_record)
|
|||||||
*/
|
*/
|
||||||
if (unlink(tablespace_map_filename.data) < 0 && errno != ENOENT)
|
if (unlink(tablespace_map_filename.data) < 0 && errno != ENOENT)
|
||||||
{
|
{
|
||||||
log_error(_("unable to remove tablespace_map file %s: %s"),
|
log_error(_("unable to remove tablespace_map file \"%s\""),
|
||||||
tablespace_map_filename.data,
|
tablespace_map_filename.data);
|
||||||
strerror(errno));
|
log_detail("%s", strerror(errno));
|
||||||
|
|
||||||
r = ERR_BAD_BASEBACKUP;
|
r = ERR_BAD_BASEBACKUP;
|
||||||
goto stop_backup;
|
goto stop_backup;
|
||||||
@@ -4971,6 +5364,90 @@ stop_backup:
|
|||||||
rmtree(local_repmgr_tmp_directory, true);
|
rmtree(local_repmgr_tmp_directory, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if replication slots in use, create replication slot
|
||||||
|
*/
|
||||||
|
if (r == SUCCESS)
|
||||||
|
{
|
||||||
|
if (config_file_options.use_replication_slots == true)
|
||||||
|
{
|
||||||
|
bool slot_warning = false;
|
||||||
|
if (runtime_options.no_upstream_connection == true)
|
||||||
|
{
|
||||||
|
slot_warning = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
|
||||||
|
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||||
|
PGconn *upstream_conn = NULL;
|
||||||
|
|
||||||
|
record_status = get_node_record(source_conn, upstream_node_id, &upstream_node_record);
|
||||||
|
|
||||||
|
if (record_status != RECORD_FOUND)
|
||||||
|
{
|
||||||
|
log_error(_("unable to retrieve node record for upstream node %i"), upstream_node_id);
|
||||||
|
slot_warning = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
|
||||||
|
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_error(_("unable to connect to upstream node %i to create a replication slot"), upstream_node_id);
|
||||||
|
slot_warning = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
record_status = get_slot_record(upstream_conn, node_record->slot_name, &slot_info);
|
||||||
|
|
||||||
|
if (record_status == RECORD_FOUND)
|
||||||
|
{
|
||||||
|
log_verbose(LOG_INFO,
|
||||||
|
_("replication slot \"%s\" aleady exists on upstream node %i"),
|
||||||
|
node_record->slot_name,
|
||||||
|
upstream_node_id);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PQExpBufferData errmsg;
|
||||||
|
|
||||||
|
initPQExpBuffer(&errmsg);
|
||||||
|
|
||||||
|
if (create_replication_slot(upstream_conn, node_record->slot_name, source_server_version_num, &errmsg) == false)
|
||||||
|
{
|
||||||
|
log_error(_("unable to create replication slot on upstream node %i"), upstream_node_id);
|
||||||
|
log_detail("%s", errmsg.data);
|
||||||
|
slot_warning = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_notice(_("replication slot \"%s\" created on upstream node \"%s\" (ID: %i)"),
|
||||||
|
node_record->slot_name,
|
||||||
|
upstream_node_record.node_name,
|
||||||
|
upstream_node_id );
|
||||||
|
}
|
||||||
|
termPQExpBuffer(&errmsg);
|
||||||
|
}
|
||||||
|
|
||||||
|
PQfinish(upstream_conn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (slot_warning == true)
|
||||||
|
{
|
||||||
|
log_warning(_("\"use_replication_slots\" specified but a replication slot could not be created"));
|
||||||
|
log_hint(_("ensure a replication slot called \"%s\" is created on the upstream node (ID: %i)"),
|
||||||
|
node_record->slot_name,
|
||||||
|
upstream_node_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5295,40 +5772,19 @@ drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name)
|
|||||||
* might not be available.
|
* might not be available.
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
create_recovery_file(t_node_info *node_record, t_conninfo_param_list *recovery_conninfo, const char *data_dir)
|
create_recovery_file(t_node_info *node_record, t_conninfo_param_list *recovery_conninfo, char *dest, bool as_file)
|
||||||
{
|
{
|
||||||
FILE *recovery_file;
|
PQExpBufferData recovery_file_buf;
|
||||||
char recovery_file_path[MAXPGPATH] = "";
|
char recovery_file_path[MAXPGPATH] = "";
|
||||||
char line[MAXLEN] = "";
|
FILE *recovery_file;
|
||||||
mode_t um;
|
mode_t um;
|
||||||
|
|
||||||
maxpath_snprintf(recovery_file_path, "%s/%s", data_dir, RECOVERY_COMMAND_FILE);
|
/* create file in buffer */
|
||||||
|
initPQExpBuffer(&recovery_file_buf);
|
||||||
/* Set umask to 0600 */
|
|
||||||
um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO));
|
|
||||||
recovery_file = fopen(recovery_file_path, "w");
|
|
||||||
umask(um);
|
|
||||||
|
|
||||||
if (recovery_file == NULL)
|
|
||||||
{
|
|
||||||
log_error(_("unable to create recovery.conf file at \"%s\""),
|
|
||||||
recovery_file_path);
|
|
||||||
log_detail("%s", strerror(errno));
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
log_debug("create_recovery_file(): creating \"%s\"...",
|
|
||||||
recovery_file_path);
|
|
||||||
|
|
||||||
/* standby_mode = 'on' */
|
/* standby_mode = 'on' */
|
||||||
maxlen_snprintf(line, "standby_mode = 'on'\n");
|
appendPQExpBuffer(&recovery_file_buf,
|
||||||
|
"standby_mode = 'on'\n");
|
||||||
if (write_recovery_file_line(recovery_file, recovery_file_path, line) == false)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
trim(line);
|
|
||||||
log_debug("recovery.conf: %s", line);
|
|
||||||
|
|
||||||
/* primary_conninfo = '...' */
|
/* primary_conninfo = '...' */
|
||||||
|
|
||||||
@@ -5339,8 +5795,10 @@ create_recovery_file(t_node_info *node_record, t_conninfo_param_list *recovery_c
|
|||||||
{
|
{
|
||||||
char *escaped = escape_recovery_conf_value(runtime_options.upstream_conninfo);
|
char *escaped = escape_recovery_conf_value(runtime_options.upstream_conninfo);
|
||||||
|
|
||||||
maxlen_snprintf(line, "primary_conninfo = '%s'\n",
|
appendPQExpBuffer(&recovery_file_buf,
|
||||||
escaped);
|
"primary_conninfo = '%s'\n",
|
||||||
|
escaped);
|
||||||
|
|
||||||
free(escaped);
|
free(escaped);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5350,84 +5808,90 @@ create_recovery_file(t_node_info *node_record, t_conninfo_param_list *recovery_c
|
|||||||
*/
|
*/
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
write_primary_conninfo(line, recovery_conninfo);
|
write_primary_conninfo(&recovery_file_buf, recovery_conninfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (write_recovery_file_line(recovery_file, recovery_file_path, line) == false)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
trim(line);
|
|
||||||
log_debug("recovery.conf: %s", line);
|
|
||||||
|
|
||||||
/* recovery_target_timeline = 'latest' */
|
/* recovery_target_timeline = 'latest' */
|
||||||
maxlen_snprintf(line, "recovery_target_timeline = 'latest'\n");
|
appendPQExpBuffer(&recovery_file_buf,
|
||||||
|
"recovery_target_timeline = 'latest'\n");
|
||||||
|
|
||||||
if (write_recovery_file_line(recovery_file, recovery_file_path, line) == false)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
trim(line);
|
|
||||||
log_debug("recovery.conf: %s", line);
|
|
||||||
|
|
||||||
/* recovery_min_apply_delay = ... (optional) */
|
/* recovery_min_apply_delay = ... (optional) */
|
||||||
if (config_file_options.recovery_min_apply_delay_provided == true)
|
if (config_file_options.recovery_min_apply_delay_provided == true)
|
||||||
{
|
{
|
||||||
maxlen_snprintf(line, "recovery_min_apply_delay = %s\n",
|
appendPQExpBuffer(&recovery_file_buf,
|
||||||
config_file_options.recovery_min_apply_delay);
|
"recovery_min_apply_delay = %s\n",
|
||||||
if (write_recovery_file_line(recovery_file, recovery_file_path, line) == false)
|
config_file_options.recovery_min_apply_delay);
|
||||||
return false;
|
|
||||||
|
|
||||||
trim(line);
|
|
||||||
log_debug("recovery.conf: %s", line);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* primary_slot_name = '...' (optional, for 9.4 and later) */
|
/* primary_slot_name = '...' (optional, for 9.4 and later) */
|
||||||
if (config_file_options.use_replication_slots)
|
if (config_file_options.use_replication_slots)
|
||||||
{
|
{
|
||||||
maxlen_snprintf(line, "primary_slot_name = %s\n",
|
appendPQExpBuffer(&recovery_file_buf,
|
||||||
node_record->slot_name);
|
"primary_slot_name = %s\n",
|
||||||
if (write_recovery_file_line(recovery_file, recovery_file_path, line) == false)
|
node_record->slot_name);
|
||||||
return false;
|
|
||||||
|
|
||||||
trim(line);
|
|
||||||
log_debug("recovery.conf: %s", line);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If restore_command is set, we use it as restore_command in
|
* If restore_command is set, we use it as restore_command in
|
||||||
* recovery.conf
|
* recovery.conf
|
||||||
*/
|
*/
|
||||||
if (strcmp(config_file_options.restore_command, "") != 0)
|
if (config_file_options.restore_command[0] != '\0')
|
||||||
{
|
{
|
||||||
maxlen_snprintf(line, "restore_command = '%s'\n",
|
char *escaped = escape_recovery_conf_value(config_file_options.restore_command);
|
||||||
config_file_options.restore_command);
|
|
||||||
if (write_recovery_file_line(recovery_file, recovery_file_path, line) == false)
|
appendPQExpBuffer(&recovery_file_buf,
|
||||||
|
"restore_command = '%s'\n",
|
||||||
|
escaped);
|
||||||
|
free(escaped);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (as_file == true)
|
||||||
|
{
|
||||||
|
maxpath_snprintf(recovery_file_path, "%s/%s", dest, RECOVERY_COMMAND_FILE);
|
||||||
|
log_debug("create_recovery_file(): creating \"%s\"...",
|
||||||
|
recovery_file_path);
|
||||||
|
|
||||||
|
/* Set umask to 0600 */
|
||||||
|
um = umask((~(S_IRUSR | S_IWUSR)) & (S_IRWXG | S_IRWXO));
|
||||||
|
recovery_file = fopen(recovery_file_path, "w");
|
||||||
|
umask(um);
|
||||||
|
|
||||||
|
if (recovery_file == NULL)
|
||||||
|
{
|
||||||
|
log_error(_("unable to create recovery.conf file at \"%s\""),
|
||||||
|
recovery_file_path);
|
||||||
|
log_detail("%s", strerror(errno));
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
trim(line);
|
log_debug("recovery file is:\n%s", recovery_file_buf.data);
|
||||||
log_debug("recovery.conf: %s", line);
|
|
||||||
}
|
|
||||||
fclose(recovery_file);
|
|
||||||
|
|
||||||
return true;
|
if (fputs(recovery_file_buf.data, recovery_file) == EOF)
|
||||||
}
|
{
|
||||||
|
log_error(_("unable to write to recovery file at \"%s\""), recovery_file_path);
|
||||||
|
fclose(recovery_file);
|
||||||
|
termPQExpBuffer(&recovery_file_buf);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool
|
|
||||||
write_recovery_file_line(FILE *recovery_file, char *recovery_file_path, char *line)
|
|
||||||
{
|
|
||||||
if (fputs(line, recovery_file) == EOF)
|
|
||||||
{
|
|
||||||
log_error(_("unable to write to recovery file at \"%s\""), recovery_file_path);
|
|
||||||
fclose(recovery_file);
|
fclose(recovery_file);
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
maxlen_snprintf(dest, "%s", recovery_file_buf.data);
|
||||||
|
}
|
||||||
|
|
||||||
|
termPQExpBuffer(&recovery_file_buf);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
write_primary_conninfo(char *line, t_conninfo_param_list *param_list)
|
write_primary_conninfo(PQExpBufferData *dest, t_conninfo_param_list *param_list)
|
||||||
{
|
{
|
||||||
PQExpBufferData conninfo_buf;
|
PQExpBufferData conninfo_buf;
|
||||||
bool application_name_provided = false;
|
bool application_name_provided = false;
|
||||||
@@ -5509,7 +5973,9 @@ write_primary_conninfo(char *line, t_conninfo_param_list *param_list)
|
|||||||
}
|
}
|
||||||
|
|
||||||
escaped = escape_recovery_conf_value(conninfo_buf.data);
|
escaped = escape_recovery_conf_value(conninfo_buf.data);
|
||||||
maxlen_snprintf(line, "primary_conninfo = '%s'\n", escaped);
|
|
||||||
|
appendPQExpBuffer(dest,
|
||||||
|
"primary_conninfo = '%s'\n", escaped);
|
||||||
|
|
||||||
free(escaped);
|
free(escaped);
|
||||||
free_conninfo_params(&env_conninfo);
|
free_conninfo_params(&env_conninfo);
|
||||||
@@ -5771,6 +6237,8 @@ do_standby_help(void)
|
|||||||
" when the intended upstream server does not yet exist\n"));
|
" when the intended upstream server does not yet exist\n"));
|
||||||
printf(_(" --upstream-node-id ID of the upstream node to replicate from (optional, defaults to primary node)\n"));
|
printf(_(" --upstream-node-id ID of the upstream node to replicate from (optional, defaults to primary node)\n"));
|
||||||
printf(_(" --without-barman do not use Barman even if configured\n"));
|
printf(_(" --without-barman do not use Barman even if configured\n"));
|
||||||
|
printf(_(" --recovery-conf-only create \"recovery.conf\" file for a previously cloned instance\n"));
|
||||||
|
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
printf(_("STANDBY REGISTER\n"));
|
printf(_("STANDBY REGISTER\n"));
|
||||||
|
|||||||
@@ -80,6 +80,7 @@ typedef struct
|
|||||||
char replication_user[MAXLEN];
|
char replication_user[MAXLEN];
|
||||||
char upstream_conninfo[MAXLEN];
|
char upstream_conninfo[MAXLEN];
|
||||||
bool without_barman;
|
bool without_barman;
|
||||||
|
bool recovery_conf_only;
|
||||||
|
|
||||||
/* "standby clone"/"standby follow" options */
|
/* "standby clone"/"standby follow" options */
|
||||||
int upstream_node_id;
|
int upstream_node_id;
|
||||||
@@ -138,14 +139,14 @@ typedef struct
|
|||||||
/* output options */ \
|
/* output options */ \
|
||||||
false, false, false, \
|
false, false, false, \
|
||||||
/* database connection options */ \
|
/* database connection options */ \
|
||||||
"", "", "", "", \
|
"", "", "", "", \
|
||||||
/* other connection options */ \
|
/* other connection options */ \
|
||||||
"", "", \
|
"", "", \
|
||||||
/* general node options */ \
|
/* general node options */ \
|
||||||
UNKNOWN_NODE_ID, "", "", UNKNOWN_NODE_ID, \
|
UNKNOWN_NODE_ID, "", "", UNKNOWN_NODE_ID, \
|
||||||
/* "standby clone" options */ \
|
/* "standby clone" options */ \
|
||||||
false, CONFIG_FILE_SAMEPATH, false, false, false, "", "", "", \
|
false, CONFIG_FILE_SAMEPATH, false, false, false, "", "", "", \
|
||||||
false, \
|
false, false, \
|
||||||
/* "standby clone"/"standby follow" options */ \
|
/* "standby clone"/"standby follow" options */ \
|
||||||
NO_UPSTREAM_NODE, \
|
NO_UPSTREAM_NODE, \
|
||||||
/* "standby register" options */ \
|
/* "standby register" options */ \
|
||||||
@@ -164,7 +165,7 @@ typedef struct
|
|||||||
false, "", CLUSTER_EVENT_LIMIT, \
|
false, "", CLUSTER_EVENT_LIMIT, \
|
||||||
/* "cluster cleanup" options */ \
|
/* "cluster cleanup" options */ \
|
||||||
0, \
|
0, \
|
||||||
/* Following options for internal use */ \
|
/* following options for internal use */ \
|
||||||
"/tmp", OM_TEXT \
|
"/tmp", OM_TEXT \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
106
repmgr-client.c
106
repmgr-client.c
@@ -176,7 +176,7 @@ main(int argc, char **argv)
|
|||||||
strncpy(runtime_options.username, pw->pw_name, MAXLEN);
|
strncpy(runtime_options.username, pw->pw_name, MAXLEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
while ((c = getopt_long(argc, argv, "?Vb:f:FWd:h:p:U:R:S:D:ckL:tvC:", long_options,
|
while ((c = getopt_long(argc, argv, "?Vb:f:FWd:h:p:U:R:S:D:ck:L:tvC:", long_options,
|
||||||
&optindex)) != -1)
|
&optindex)) != -1)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@@ -388,6 +388,11 @@ main(int argc, char **argv)
|
|||||||
runtime_options.without_barman = true;
|
runtime_options.without_barman = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OPT_RECOVERY_CONF_ONLY:
|
||||||
|
runtime_options.recovery_conf_only = true;
|
||||||
|
break;
|
||||||
|
|
||||||
|
|
||||||
/*---------------------------
|
/*---------------------------
|
||||||
* "standby register" options
|
* "standby register" options
|
||||||
*---------------------------
|
*---------------------------
|
||||||
@@ -993,31 +998,10 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* check for conflicts between runtime options and configuration file */
|
|
||||||
/* ================================================================== */
|
|
||||||
|
|
||||||
if (action == STANDBY_CLONE)
|
|
||||||
{
|
|
||||||
standy_clone_mode mode = get_standby_clone_mode();
|
|
||||||
|
|
||||||
if (mode == barman && runtime_options.without_barman == false
|
|
||||||
&& config_file_options.use_replication_slots == true)
|
|
||||||
{
|
|
||||||
log_error(_("STANDBY CLONE in Barman mode is incompatible with configuration option \"use_replication_slots\""));
|
|
||||||
log_hint(_("set \"use_replication_slots\" to \"no\" in repmgr.conf, or use --without-barman to clone directly from the upstream server"));
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for configuration file items which can be overriden by runtime
|
* Check for configuration file items which can be overriden by runtime
|
||||||
* options
|
* options
|
||||||
*/
|
* =====================================================================
|
||||||
|
|
||||||
/*
|
|
||||||
* ============================================================================
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1495,6 +1479,39 @@ check_cli_parameters(const int action)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (runtime_options.replication_user[0])
|
||||||
|
{
|
||||||
|
switch (action)
|
||||||
|
{
|
||||||
|
case PRIMARY_REGISTER:
|
||||||
|
case STANDBY_REGISTER:
|
||||||
|
case STANDBY_CLONE:
|
||||||
|
break;
|
||||||
|
|
||||||
|
case STANDBY_FOLLOW:
|
||||||
|
item_list_append_format(&cli_warnings,
|
||||||
|
_("--replication-user ignored when executing %s"),
|
||||||
|
action_name(action));
|
||||||
|
default:
|
||||||
|
item_list_append_format(&cli_warnings,
|
||||||
|
_("--replication-user not required when executing %s"),
|
||||||
|
action_name(action));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (runtime_options.recovery_conf_only == true)
|
||||||
|
{
|
||||||
|
switch (action)
|
||||||
|
{
|
||||||
|
case STANDBY_CLONE:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
item_list_append_format(&cli_warnings,
|
||||||
|
_("--create-recovery-conf will be ignored when executing %s"),
|
||||||
|
action_name(action));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (runtime_options.event[0])
|
if (runtime_options.event[0])
|
||||||
{
|
{
|
||||||
switch (action)
|
switch (action)
|
||||||
@@ -1508,25 +1525,6 @@ check_cli_parameters(const int action)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (runtime_options.replication_user[0])
|
|
||||||
{
|
|
||||||
switch (action)
|
|
||||||
{
|
|
||||||
case PRIMARY_REGISTER:
|
|
||||||
case STANDBY_REGISTER:
|
|
||||||
break;
|
|
||||||
case STANDBY_CLONE:
|
|
||||||
case STANDBY_FOLLOW:
|
|
||||||
item_list_append_format(&cli_warnings,
|
|
||||||
_("--replication-user ignored when executing %s)"),
|
|
||||||
action_name(action));
|
|
||||||
default:
|
|
||||||
item_list_append_format(&cli_warnings,
|
|
||||||
_("--replication-user not required when executing %s"),
|
|
||||||
action_name(action));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (runtime_options.limit_provided)
|
if (runtime_options.limit_provided)
|
||||||
{
|
{
|
||||||
switch (action)
|
switch (action)
|
||||||
@@ -1777,10 +1775,11 @@ do_help(void)
|
|||||||
|
|
||||||
printf(_("Usage:\n"));
|
printf(_("Usage:\n"));
|
||||||
printf(_(" %s [OPTIONS] primary {register|unregister}\n"), progname());
|
printf(_(" %s [OPTIONS] primary {register|unregister}\n"), progname());
|
||||||
printf(_(" %s [OPTIONS] standby {register|unregister|clone|promote|follow}\n"), progname());
|
printf(_(" %s [OPTIONS] standby {register|unregister|clone|promote|follow|switchover}\n"), progname());
|
||||||
printf(_(" %s [OPTIONS] bdr {register|unregister}\n"), progname());
|
printf(_(" %s [OPTIONS] bdr {register|unregister}\n"), progname());
|
||||||
printf(_(" %s [OPTIONS] node status\n"), progname());
|
printf(_(" %s [OPTIONS] node {status|check|rejoin|service}\n"), progname());
|
||||||
printf(_(" %s [OPTIONS] cluster {show|event|matrix|crosscheck}\n"), progname());
|
printf(_(" %s [OPTIONS] cluster {show|event|matrix|crosscheck}\n"), progname());
|
||||||
|
printf(_(" %s [OPTIONS] witness {register|unregister}\n"), progname());
|
||||||
|
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
@@ -2151,10 +2150,19 @@ local_command(const char *command, PQExpBufferData *outputbuf)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* get_superuser_connection()
|
||||||
|
*
|
||||||
|
* Check if provided connection "conn" is a superuser connection, if not attempt to
|
||||||
|
* make a superuser connection "superuser_conn" with the provided --superuser parameter.
|
||||||
|
*
|
||||||
|
* "privileged_conn" is set to whichever connection is the superuser connection.
|
||||||
|
*/
|
||||||
void
|
void
|
||||||
get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGconn **privileged_conn)
|
get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGconn **privileged_conn)
|
||||||
{
|
{
|
||||||
t_connection_user userinfo = T_CONNECTION_USER_INITIALIZER;
|
t_connection_user userinfo = T_CONNECTION_USER_INITIALIZER;
|
||||||
|
t_conninfo_param_list conninfo_params = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
||||||
bool is_superuser = false;
|
bool is_superuser = false;
|
||||||
|
|
||||||
/* this should never happen */
|
/* this should never happen */
|
||||||
@@ -2180,9 +2188,11 @@ get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGconn **privil
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
*superuser_conn = establish_db_connection_as_user(config_file_options.conninfo,
|
initialize_conninfo_params(&conninfo_params, false);
|
||||||
runtime_options.superuser,
|
conn_to_param_list(*conn, &conninfo_params);
|
||||||
false);
|
param_set(&conninfo_params, "user", runtime_options.superuser);
|
||||||
|
|
||||||
|
*superuser_conn = establish_db_connection_by_params(&conninfo_params, false);
|
||||||
|
|
||||||
if (PQstatus(*superuser_conn) != CONNECTION_OK)
|
if (PQstatus(*superuser_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
@@ -2707,7 +2717,7 @@ init_node_record(t_node_info *node_record)
|
|||||||
|
|
||||||
if (config_file_options.replication_user[0] != '\0')
|
if (config_file_options.replication_user[0] != '\0')
|
||||||
{
|
{
|
||||||
/* replication user explicitly provided */
|
/* replication user explicitly provided in configuration file */
|
||||||
strncpy(node_record->repluser, config_file_options.replication_user, NAMEDATALEN);
|
strncpy(node_record->repluser, config_file_options.replication_user, NAMEDATALEN);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -85,6 +85,7 @@
|
|||||||
#define OPT_WAIT_START 1036
|
#define OPT_WAIT_START 1036
|
||||||
#define OPT_REPL_CONN 1037
|
#define OPT_REPL_CONN 1037
|
||||||
#define OPT_REMOTE_NODE_ID 1038
|
#define OPT_REMOTE_NODE_ID 1038
|
||||||
|
#define OPT_RECOVERY_CONF_ONLY 1039
|
||||||
|
|
||||||
/* deprecated since 3.3 */
|
/* deprecated since 3.3 */
|
||||||
#define OPT_DATA_DIR 999
|
#define OPT_DATA_DIR 999
|
||||||
@@ -139,6 +140,7 @@ static struct option long_options[] =
|
|||||||
{"upstream-conninfo", required_argument, NULL, OPT_UPSTREAM_CONNINFO},
|
{"upstream-conninfo", required_argument, NULL, OPT_UPSTREAM_CONNINFO},
|
||||||
{"upstream-node-id", required_argument, NULL, OPT_UPSTREAM_NODE_ID},
|
{"upstream-node-id", required_argument, NULL, OPT_UPSTREAM_NODE_ID},
|
||||||
{"without-barman", no_argument, NULL, OPT_WITHOUT_BARMAN},
|
{"without-barman", no_argument, NULL, OPT_WITHOUT_BARMAN},
|
||||||
|
{"recovery-conf-only", no_argument, NULL, OPT_RECOVERY_CONF_ONLY},
|
||||||
|
|
||||||
/* "standby register" options */
|
/* "standby register" options */
|
||||||
{"wait-start", required_argument, NULL, OPT_WAIT_START},
|
{"wait-start", required_argument, NULL, OPT_WAIT_START},
|
||||||
|
|||||||
@@ -161,7 +161,7 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
|
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
# Standby clone settings
|
# "standby clone" settings
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
#
|
#
|
||||||
# These settings apply when cloning a standby ("repmgr standby clone").
|
# These settings apply when cloning a standby ("repmgr standby clone").
|
||||||
@@ -178,6 +178,20 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
#restore_command='' # This will be placed in the recovery.conf
|
#restore_command='' # This will be placed in the recovery.conf
|
||||||
# file generated by repmgr
|
# file generated by repmgr
|
||||||
|
|
||||||
|
|
||||||
|
#------------------------------------------------------------------------------
|
||||||
|
# "standby promote" settings
|
||||||
|
#------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# These settings apply when instructing a standby to promote itself to the
|
||||||
|
# new primary ("repmgr standby promote").
|
||||||
|
|
||||||
|
#promote_check_timeout=60 # The length of time (in seconds) to wait
|
||||||
|
# for the new primary to finish promoting
|
||||||
|
#promote_check_interval=1 # The interval (in seconds) to check whether
|
||||||
|
# the new primary has finished promoting
|
||||||
|
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
# Standby follow settings
|
# Standby follow settings
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
@@ -235,6 +249,8 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
#primary_notification_timeout=60 # Interval (in seconds) which repmgrd on a standby
|
#primary_notification_timeout=60 # Interval (in seconds) which repmgrd on a standby
|
||||||
# will wait for a notification from the new primary,
|
# will wait for a notification from the new primary,
|
||||||
# before falling back to degraded monitoring
|
# before falling back to degraded monitoring
|
||||||
|
#standby_reconnect_timeout=60 # Interval (in seconds) which repmgrd on a standby will wait
|
||||||
|
# to reconnect to the local node after executing "follow_command"
|
||||||
|
|
||||||
#monitoring_history=no # Whether to write monitoring data to the "montoring_history" table
|
#monitoring_history=no # Whether to write monitoring data to the "montoring_history" table
|
||||||
#monitor_interval_secs=2 # Interval (in seconds) at which to write monitoring data
|
#monitor_interval_secs=2 # Interval (in seconds) at which to write monitoring data
|
||||||
|
|||||||
3
repmgr.h
3
repmgr.h
@@ -77,6 +77,9 @@
|
|||||||
#define DEFAULT_REPLICATION_LAG_CRITICAL 600 /* seconds */
|
#define DEFAULT_REPLICATION_LAG_CRITICAL 600 /* seconds */
|
||||||
#define DEFAULT_WITNESS_SYNC_INTERVAL 15 /* seconds */
|
#define DEFAULT_WITNESS_SYNC_INTERVAL 15 /* seconds */
|
||||||
#define DEFAULT_WAIT_START 30 /* seconds */
|
#define DEFAULT_WAIT_START 30 /* seconds */
|
||||||
|
#define DEFAULT_PROMOTE_CHECK_TIMEOUT 60 /* seconds */
|
||||||
|
#define DEFAULT_PROMOTE_CHECK_INTERVAL 1 /* seconds */
|
||||||
|
#define DEFAULT_STANDBY_RECONNECT_TIMEOUT 60 /* seconds */
|
||||||
|
|
||||||
#ifndef RECOVERY_COMMAND_FILE
|
#ifndef RECOVERY_COMMAND_FILE
|
||||||
#define RECOVERY_COMMAND_FILE "recovery.conf"
|
#define RECOVERY_COMMAND_FILE "recovery.conf"
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
#define REPMGR_VERSION_DATE ""
|
#define REPMGR_VERSION_DATE ""
|
||||||
#define REPMGR_VERSION "4.0.3"
|
#define REPMGR_VERSION "4.0.4"
|
||||||
|
|
||||||
|
|||||||
@@ -35,6 +35,29 @@ do_bdr_node_check(void)
|
|||||||
/* nothing to do at the moment */
|
/* nothing to do at the moment */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
handle_sigint_bdr(SIGNAL_ARGS)
|
||||||
|
{
|
||||||
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
"%s signal received",
|
||||||
|
postgres_signal_arg == SIGTERM
|
||||||
|
? "TERM" : "INT");
|
||||||
|
|
||||||
|
create_event_notification(local_conn,
|
||||||
|
&config_file_options,
|
||||||
|
config_file_options.node_id,
|
||||||
|
"repmgrd_shutdown",
|
||||||
|
true,
|
||||||
|
event_details.data);
|
||||||
|
termPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
terminate(SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
monitor_bdr(void)
|
monitor_bdr(void)
|
||||||
@@ -98,23 +121,6 @@ monitor_bdr(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Retrieve record for this node from the local database */
|
|
||||||
record_status = get_node_record(local_conn, config_file_options.node_id, &local_node_info);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Terminate if we can't find the local node record. This is a
|
|
||||||
* "fix-the-config" situation, not a lot else we can do.
|
|
||||||
*/
|
|
||||||
if (record_status != RECORD_FOUND)
|
|
||||||
{
|
|
||||||
log_error(_("unable to retrieve record for local node (ID: %i), terminating"),
|
|
||||||
local_node_info.node_id);
|
|
||||||
log_hint(_("check that \"repmgr bdr register\" was executed for this node"));
|
|
||||||
PQfinish(local_conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (local_node_info.active == false)
|
if (local_node_info.active == false)
|
||||||
{
|
{
|
||||||
log_error(_("local node (ID: %i) is marked as inactive in repmgr"),
|
log_error(_("local node (ID: %i) is marked as inactive in repmgr"),
|
||||||
@@ -152,15 +158,16 @@ monitor_bdr(void)
|
|||||||
cell->node_info->node_status = NODE_STATUS_UP;
|
cell->node_info->node_status = NODE_STATUS_UP;
|
||||||
}
|
}
|
||||||
|
|
||||||
log_debug("main_loop_bdr() monitoring local node %i", config_file_options.node_id);
|
log_info(_("starting continuous BDR node monitoring on node %i"),
|
||||||
|
config_file_options.node_id);
|
||||||
|
|
||||||
log_info(_("starting continuous BDR node monitoring"));
|
INSTR_TIME_SET_CURRENT(log_status_interval_start);
|
||||||
|
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
|
|
||||||
/* monitoring loop */
|
/* monitoring loop */
|
||||||
log_verbose(LOG_DEBUG, "BDR check loop...");
|
log_verbose(LOG_DEBUG, "BDR check loop - checking %i nodes", nodes.node_count);
|
||||||
|
|
||||||
for (cell = nodes.head; cell; cell = cell->next)
|
for (cell = nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
@@ -262,7 +269,6 @@ loop:
|
|||||||
if (config_file_options.log_status_interval > 0)
|
if (config_file_options.log_status_interval > 0)
|
||||||
{
|
{
|
||||||
int log_status_interval_elapsed = calculate_elapsed(log_status_interval_start);
|
int log_status_interval_elapsed = calculate_elapsed(log_status_interval_start);
|
||||||
|
|
||||||
if (log_status_interval_elapsed >= config_file_options.log_status_interval)
|
if (log_status_interval_elapsed >= config_file_options.log_status_interval)
|
||||||
{
|
{
|
||||||
log_info(_("monitoring BDR replication status on node \"%s\" (ID: %i)"),
|
log_info(_("monitoring BDR replication status on node \"%s\" (ID: %i)"),
|
||||||
@@ -273,8 +279,7 @@ loop:
|
|||||||
{
|
{
|
||||||
if (cell->node_info->monitoring_state == MS_DEGRADED)
|
if (cell->node_info->monitoring_state == MS_DEGRADED)
|
||||||
{
|
{
|
||||||
log_detail(
|
log_detail(_("monitoring node \"%s\" (ID: %i) in degraded mode"),
|
||||||
_("monitoring node \"%s\" (ID: %i) in degraded mode"),
|
|
||||||
cell->node_info->node_name,
|
cell->node_info->node_name,
|
||||||
cell->node_info->node_id);
|
cell->node_info->node_id);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,4 +22,5 @@
|
|||||||
extern void do_bdr_node_check(void);
|
extern void do_bdr_node_check(void);
|
||||||
extern void monitor_bdr(void);
|
extern void monitor_bdr(void);
|
||||||
|
|
||||||
|
extern void handle_sigint_bdr(SIGNAL_ARGS);
|
||||||
#endif /* _REPMGRD_BDR_H_ */
|
#endif /* _REPMGRD_BDR_H_ */
|
||||||
|
|||||||
@@ -85,6 +85,36 @@ static void update_monitoring_history(void);
|
|||||||
static const char * format_failover_state(FailoverState failover_state);
|
static const char * format_failover_state(FailoverState failover_state);
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
handle_sigint_physical(SIGNAL_ARGS)
|
||||||
|
{
|
||||||
|
PGconn *writeable_conn;
|
||||||
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
"%s signal received",
|
||||||
|
postgres_signal_arg == SIGTERM
|
||||||
|
? "TERM" : "INT");
|
||||||
|
|
||||||
|
if (local_node_info.type == PRIMARY)
|
||||||
|
writeable_conn = local_conn;
|
||||||
|
else
|
||||||
|
writeable_conn = primary_conn;
|
||||||
|
|
||||||
|
create_event_notification(writeable_conn,
|
||||||
|
&config_file_options,
|
||||||
|
config_file_options.node_id,
|
||||||
|
"repmgrd_shutdown",
|
||||||
|
true,
|
||||||
|
event_details.data);
|
||||||
|
|
||||||
|
termPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
terminate(SUCCESS);
|
||||||
|
}
|
||||||
|
|
||||||
/* perform some sanity checks on the node's configuration */
|
/* perform some sanity checks on the node's configuration */
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -113,6 +143,14 @@ do_physical_node_check(void)
|
|||||||
log_error(_("this node is marked as inactive and cannot be used as a failover target"));
|
log_error(_("this node is marked as inactive and cannot be used as a failover target"));
|
||||||
log_hint(_("%s"), hint);
|
log_hint(_("%s"), hint);
|
||||||
PQfinish(local_conn);
|
PQfinish(local_conn);
|
||||||
|
|
||||||
|
create_event_notification(NULL,
|
||||||
|
&config_file_options,
|
||||||
|
config_file_options.node_id,
|
||||||
|
"repmgrd_shutdown",
|
||||||
|
false,
|
||||||
|
"node is inactive and cannot be used as a failover target");
|
||||||
|
|
||||||
terminate(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
|
|
||||||
case FAILOVER_MANUAL:
|
case FAILOVER_MANUAL:
|
||||||
@@ -280,6 +318,7 @@ monitor_streaming_primary(void)
|
|||||||
|
|
||||||
monitoring_state = MS_DEGRADED;
|
monitoring_state = MS_DEGRADED;
|
||||||
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
|
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
|
||||||
|
log_notice(_("unable to connect to local node, falling back to degraded monitoring"));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -303,7 +342,7 @@ monitor_streaming_primary(void)
|
|||||||
create_event_notification(NULL,
|
create_event_notification(NULL,
|
||||||
&config_file_options,
|
&config_file_options,
|
||||||
config_file_options.node_id,
|
config_file_options.node_id,
|
||||||
"repmgrd_terminate",
|
"repmgrd_shutdown",
|
||||||
true,
|
true,
|
||||||
event_details.data);
|
event_details.data);
|
||||||
|
|
||||||
@@ -325,22 +364,20 @@ monitor_streaming_primary(void)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
local_node_info.node_status = NODE_STATUS_UP;
|
local_node_info.node_status = NODE_STATUS_UP;
|
||||||
monitoring_state = MS_NORMAL;
|
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
|
||||||
|
|
||||||
/* check to see if the node has been restored as a standby */
|
/* check to see if the node has been restored as a standby */
|
||||||
if (get_recovery_type(local_conn) == RECTYPE_STANDBY)
|
if (get_recovery_type(local_conn) == RECTYPE_STANDBY)
|
||||||
{
|
{
|
||||||
PGconn *new_primary_conn;
|
PGconn *new_primary_conn;
|
||||||
|
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("reconnected to node after %i seconds, node is now a standby, switching to standby monitoring"),
|
_("reconnected to node after %i seconds, node is now a standby, switching to standby monitoring"),
|
||||||
degraded_monitoring_elapsed);
|
degraded_monitoring_elapsed);
|
||||||
log_notice("%s", event_details.data);
|
log_notice("%s", event_details.data);
|
||||||
termPQExpBuffer(&event_details);
|
termPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
|
||||||
primary_node_id = UNKNOWN_NODE_ID;
|
primary_node_id = UNKNOWN_NODE_ID;
|
||||||
|
|
||||||
new_primary_conn = get_primary_connection_quiet(local_conn, &primary_node_id, NULL);
|
new_primary_conn = get_primary_connection_quiet(local_conn, &primary_node_id, NULL);
|
||||||
@@ -353,54 +390,103 @@ monitor_streaming_primary(void)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
RecordStatus record_status;
|
RecordStatus record_status;
|
||||||
int i = 0;
|
|
||||||
|
|
||||||
log_debug("primary node id is now %i", primary_node_id);
|
log_debug("primary node id is now %i", primary_node_id);
|
||||||
|
|
||||||
/*
|
record_status = get_node_record(new_primary_conn, config_file_options.node_id, &local_node_info);
|
||||||
* poll for a while until record type is returned as "STANDBY" - it's possible
|
|
||||||
* that there's a gap between the server being restarted and the record
|
|
||||||
* being updated
|
|
||||||
*/
|
|
||||||
for (i = 0; i < 30; i++)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* try and refresh the local node record from the primary, as the updated
|
|
||||||
* local node record may not have been replicated yet
|
|
||||||
*/
|
|
||||||
|
|
||||||
record_status = get_node_record(new_primary_conn, config_file_options.node_id, &local_node_info);
|
|
||||||
|
|
||||||
if (record_status == RECORD_FOUND)
|
|
||||||
{
|
|
||||||
log_debug("type = %s", get_node_type_string(local_node_info.type));
|
|
||||||
|
|
||||||
if (local_node_info.type == STANDBY)
|
|
||||||
{
|
|
||||||
PQfinish(new_primary_conn);
|
|
||||||
|
|
||||||
/* XXX add event notification */
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sleep(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
PQfinish(new_primary_conn);
|
|
||||||
|
|
||||||
if (record_status == RECORD_FOUND)
|
if (record_status == RECORD_FOUND)
|
||||||
{
|
{
|
||||||
log_warning(_("repmgr node record is still %s"), get_node_type_string(local_node_info.type));
|
bool resume_monitoring = true;
|
||||||
|
|
||||||
|
log_debug("node %i is registered with type = %s",
|
||||||
|
config_file_options.node_id,
|
||||||
|
get_node_type_string(local_node_info.type));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* node has recovered but metadata not updated - we can do that ourselves,
|
||||||
|
*/
|
||||||
|
if (local_node_info.type == PRIMARY)
|
||||||
|
{
|
||||||
|
log_notice(_("node \"%s\" (ID: %i) still registered as primary, setting to standby"),
|
||||||
|
config_file_options.node_name,
|
||||||
|
config_file_options.node_id);
|
||||||
|
|
||||||
|
if (update_node_record_set_active_standby(new_primary_conn, config_file_options.node_id) == false)
|
||||||
|
{
|
||||||
|
resume_monitoring = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
record_status = get_node_record(new_primary_conn, config_file_options.node_id, &local_node_info);
|
||||||
|
|
||||||
|
if (record_status != RECORD_FOUND)
|
||||||
|
{
|
||||||
|
resume_monitoring = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (resume_monitoring == true)
|
||||||
|
{
|
||||||
|
monitoring_state = MS_NORMAL;
|
||||||
|
log_notice(_("former primary has been restored as standby after %i seconds, updating node record and resuming monitoring"),
|
||||||
|
degraded_monitoring_elapsed);
|
||||||
|
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("node restored as standby after %i seconds, monitoring connection to upstream node %i"),
|
||||||
|
degraded_monitoring_elapsed,
|
||||||
|
local_node_info.upstream_node_id);
|
||||||
|
|
||||||
|
create_event_notification(new_primary_conn,
|
||||||
|
&config_file_options,
|
||||||
|
config_file_options.node_id,
|
||||||
|
"repmgrd_standby_reconnect",
|
||||||
|
true,
|
||||||
|
event_details.data);
|
||||||
|
|
||||||
|
|
||||||
|
termPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
PQfinish(new_primary_conn);
|
||||||
|
|
||||||
|
/* restart monitoring as standby */
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else if (record_status == RECORD_NOT_FOUND)
|
||||||
{
|
{
|
||||||
log_error(_("no metadata record found for this node"));
|
PQExpBufferData event_details;
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("no metadata record found for this node on current primary %i"),
|
||||||
|
primary_node_id);
|
||||||
|
|
||||||
|
log_error("%s", event_details.data);
|
||||||
log_hint(_("check that 'repmgr (primary|standby) register' was executed for this node"));
|
log_hint(_("check that 'repmgr (primary|standby) register' was executed for this node"));
|
||||||
|
|
||||||
|
PQfinish(new_primary_conn);
|
||||||
|
|
||||||
|
create_event_notification(NULL,
|
||||||
|
&config_file_options,
|
||||||
|
config_file_options.node_id,
|
||||||
|
"repmgrd_shutdown",
|
||||||
|
false,
|
||||||
|
event_details.data);
|
||||||
|
termPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
monitoring_state = MS_NORMAL;
|
||||||
|
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("reconnected to primary node after %i seconds, resuming monitoring"),
|
_("reconnected to primary node after %i seconds, resuming monitoring"),
|
||||||
degraded_monitoring_elapsed);
|
degraded_monitoring_elapsed);
|
||||||
@@ -441,7 +527,7 @@ loop:
|
|||||||
|
|
||||||
if (monitoring_state == MS_DEGRADED)
|
if (monitoring_state == MS_DEGRADED)
|
||||||
{
|
{
|
||||||
log_detail(_("waiting for primary to reappear"));
|
log_detail(_("waiting for the node to become available"));
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTR_TIME_SET_CURRENT(log_status_interval_start);
|
INSTR_TIME_SET_CURRENT(log_status_interval_start);
|
||||||
@@ -472,8 +558,6 @@ loop:
|
|||||||
got_SIGHUP = false;
|
got_SIGHUP = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "sleeping %i seconds (parameter \"monitor_interval_secs\")",
|
log_verbose(LOG_DEBUG, "sleeping %i seconds (parameter \"monitor_interval_secs\")",
|
||||||
config_file_options.monitor_interval_secs);
|
config_file_options.monitor_interval_secs);
|
||||||
|
|
||||||
@@ -1009,6 +1093,9 @@ loop:
|
|||||||
got_SIGHUP = false;
|
got_SIGHUP = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "sleeping %i seconds (parameter \"monitor_interval_secs\")",
|
||||||
|
config_file_options.monitor_interval_secs);
|
||||||
|
|
||||||
sleep(config_file_options.monitor_interval_secs);
|
sleep(config_file_options.monitor_interval_secs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1029,9 +1116,26 @@ monitor_streaming_witness(void)
|
|||||||
|
|
||||||
if (get_primary_node_record(local_conn, &upstream_node_info) == false)
|
if (get_primary_node_record(local_conn, &upstream_node_info) == false)
|
||||||
{
|
{
|
||||||
log_error(_("unable to retrieve record for primary node"));
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
_("unable to retrieve record for primary node"));
|
||||||
|
|
||||||
|
log_error("%s", event_details.data);
|
||||||
log_hint(_("execute \"repmgr witness register --force\" to update the witness node "));
|
log_hint(_("execute \"repmgr witness register --force\" to update the witness node "));
|
||||||
PQfinish(local_conn);
|
PQfinish(local_conn);
|
||||||
|
|
||||||
|
create_event_notification(NULL,
|
||||||
|
&config_file_options,
|
||||||
|
config_file_options.node_id,
|
||||||
|
"repmgrd_shutdown",
|
||||||
|
false,
|
||||||
|
event_details.data);
|
||||||
|
|
||||||
|
termPQExpBuffer(&event_details);
|
||||||
|
|
||||||
terminate(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1339,6 +1443,9 @@ loop:
|
|||||||
got_SIGHUP = false;
|
got_SIGHUP = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "sleeping %i seconds (parameter \"monitor_interval_secs\")",
|
||||||
|
config_file_options.monitor_interval_secs);
|
||||||
|
|
||||||
sleep(config_file_options.monitor_interval_secs);
|
sleep(config_file_options.monitor_interval_secs);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1376,7 +1483,7 @@ do_primary_failover(void)
|
|||||||
|
|
||||||
failover_state = promote_self();
|
failover_state = promote_self();
|
||||||
}
|
}
|
||||||
else if (election_result == ELECTION_LOST)
|
else if (election_result == ELECTION_LOST || election_result == ELECTION_NOT_CANDIDATE)
|
||||||
{
|
{
|
||||||
log_info(_("follower node awaiting notification from the candidate node"));
|
log_info(_("follower node awaiting notification from the candidate node"));
|
||||||
failover_state = FAILOVER_STATE_WAITING_NEW_PRIMARY;
|
failover_state = FAILOVER_STATE_WAITING_NEW_PRIMARY;
|
||||||
@@ -1392,7 +1499,7 @@ do_primary_failover(void)
|
|||||||
|
|
||||||
/* TODO: rerun election if new primary doesn't appear after timeout */
|
/* TODO: rerun election if new primary doesn't appear after timeout */
|
||||||
|
|
||||||
/* either follow or time out; either way resume monitoring */
|
/* either follow, self-promote or time out; either way resume monitoring */
|
||||||
if (wait_primary_notification(&new_primary_id) == true)
|
if (wait_primary_notification(&new_primary_id) == true)
|
||||||
{
|
{
|
||||||
/* if primary has reappeared, no action needed */
|
/* if primary has reappeared, no action needed */
|
||||||
@@ -1443,11 +1550,9 @@ do_primary_failover(void)
|
|||||||
&config_file_options,
|
&config_file_options,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
"standby_disconnect_manual",
|
"standby_disconnect_manual",
|
||||||
|
/*
|
||||||
/*
|
* here "true" indicates the action has occurred as expected
|
||||||
* here "true" indicates the action has occurred as
|
*/
|
||||||
* expected
|
|
||||||
*/
|
|
||||||
true,
|
true,
|
||||||
event_details.data);
|
event_details.data);
|
||||||
PQfinish(new_primary_conn);
|
PQfinish(new_primary_conn);
|
||||||
@@ -1666,7 +1771,7 @@ do_upstream_standby_failover(void)
|
|||||||
t_node_info primary_node_info = T_NODE_INFO_INITIALIZER;
|
t_node_info primary_node_info = T_NODE_INFO_INITIALIZER;
|
||||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||||
RecoveryType primary_type = RECTYPE_UNKNOWN;
|
RecoveryType primary_type = RECTYPE_UNKNOWN;
|
||||||
int r;
|
int i, r;
|
||||||
char parsed_follow_command[MAXPGPATH] = "";
|
char parsed_follow_command[MAXPGPATH] = "";
|
||||||
|
|
||||||
PQfinish(upstream_conn);
|
PQfinish(upstream_conn);
|
||||||
@@ -1751,8 +1856,30 @@ do_upstream_standby_failover(void)
|
|||||||
termPQExpBuffer(&event_details);
|
termPQExpBuffer(&event_details);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* reconnect to local node */
|
/*
|
||||||
local_conn = establish_db_connection(config_file_options.conninfo, false);
|
* It's possible that the standby is still starting up after the "follow_command"
|
||||||
|
* completes, so poll for a while until we get a connection.
|
||||||
|
*/
|
||||||
|
|
||||||
|
for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
|
||||||
|
{
|
||||||
|
local_conn = establish_db_connection(local_node_info.conninfo, false);
|
||||||
|
|
||||||
|
if (PQstatus(local_conn) == CONNECTION_OK)
|
||||||
|
break;
|
||||||
|
|
||||||
|
log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node",
|
||||||
|
i + 1,
|
||||||
|
config_file_options.standby_reconnect_timeout);
|
||||||
|
sleep(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_error(_("unable to reconnect to local node %i"),
|
||||||
|
local_node_info.node_id);
|
||||||
|
return FAILOVER_STATE_FOLLOW_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
/* refresh shared memory settings which will have been zapped by the restart */
|
/* refresh shared memory settings which will have been zapped by the restart */
|
||||||
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
|
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
|
||||||
@@ -2032,7 +2159,7 @@ follow_new_primary(int new_primary_id)
|
|||||||
char parsed_follow_command[MAXPGPATH] = "";
|
char parsed_follow_command[MAXPGPATH] = "";
|
||||||
|
|
||||||
PQExpBufferData event_details;
|
PQExpBufferData event_details;
|
||||||
int r;
|
int i, r;
|
||||||
|
|
||||||
/* Store details of the failed node here */
|
/* Store details of the failed node here */
|
||||||
t_node_info failed_primary = T_NODE_INFO_INITIALIZER;
|
t_node_info failed_primary = T_NODE_INFO_INITIALIZER;
|
||||||
@@ -2144,8 +2271,6 @@ follow_new_primary(int new_primary_id)
|
|||||||
return FAILOVER_STATE_FOLLOW_FAIL;
|
return FAILOVER_STATE_FOLLOW_FAIL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* refresh local copy of local and primary node records - we get these
|
* refresh local copy of local and primary node records - we get these
|
||||||
* directly from the primary to ensure they're the current version
|
* directly from the primary to ensure they're the current version
|
||||||
@@ -2168,7 +2293,30 @@ follow_new_primary(int new_primary_id)
|
|||||||
return FAILOVER_STATE_FOLLOW_FAIL;
|
return FAILOVER_STATE_FOLLOW_FAIL;
|
||||||
}
|
}
|
||||||
|
|
||||||
local_conn = establish_db_connection(local_node_info.conninfo, false);
|
/*
|
||||||
|
* It's possible that the standby is still starting up after the "follow_command"
|
||||||
|
* completes, so poll for a while until we get a connection.
|
||||||
|
*/
|
||||||
|
|
||||||
|
for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
|
||||||
|
{
|
||||||
|
local_conn = establish_db_connection(local_node_info.conninfo, false);
|
||||||
|
|
||||||
|
if (PQstatus(local_conn) == CONNECTION_OK)
|
||||||
|
break;
|
||||||
|
|
||||||
|
log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node",
|
||||||
|
i + 1,
|
||||||
|
config_file_options.standby_reconnect_timeout);
|
||||||
|
sleep(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_error(_("unable to reconnect to local node %i"),
|
||||||
|
local_node_info.node_id);
|
||||||
|
return FAILOVER_STATE_FOLLOW_FAIL;
|
||||||
|
}
|
||||||
|
|
||||||
/* refresh shared memory settings which will have been zapped by the restart */
|
/* refresh shared memory settings which will have been zapped by the restart */
|
||||||
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
|
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
|
||||||
@@ -2181,8 +2329,7 @@ follow_new_primary(int new_primary_id)
|
|||||||
|
|
||||||
log_notice("%s", event_details.data);
|
log_notice("%s", event_details.data);
|
||||||
|
|
||||||
create_event_notification(
|
create_event_notification(upstream_conn,
|
||||||
upstream_conn,
|
|
||||||
&config_file_options,
|
&config_file_options,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
"repmgrd_failover_follow",
|
"repmgrd_failover_follow",
|
||||||
@@ -2356,18 +2503,20 @@ do_election(void)
|
|||||||
|
|
||||||
if (config_file_options.failover == FAILOVER_MANUAL)
|
if (config_file_options.failover == FAILOVER_MANUAL)
|
||||||
{
|
{
|
||||||
log_notice(_("this node is not configured for automatic failover so will not be considered as promotion candidate"));
|
log_notice(_("this node is not configured for automatic failover so will not be considered as promotion candidate, and will not follow the new primary"));
|
||||||
|
log_detail(_("\"failover\" is set to \"manual\" in repmgr.conf"));
|
||||||
|
log_hint(_("manually execute \"repmgr standby follow\" to have this node follow the new primary"));
|
||||||
|
|
||||||
return ELECTION_LOST;
|
return ELECTION_NOT_CANDIDATE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* node priority is set to zero - don't ever become a candidate */
|
/* node priority is set to zero - don't become a candidate, and lose by default */
|
||||||
if (local_node_info.priority <= 0)
|
if (local_node_info.priority <= 0)
|
||||||
{
|
{
|
||||||
log_notice(_("this node's priority is %i so will not be considered as an automatic promotion candidate"),
|
log_notice(_("this node's priority is %i so will not be considered as an automatic promotion candidate"),
|
||||||
local_node_info.priority);
|
local_node_info.priority);
|
||||||
|
|
||||||
return ELECTION_NOT_CANDIDATE;
|
return ELECTION_LOST;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get all active nodes attached to upstream, excluding self */
|
/* get all active nodes attached to upstream, excluding self */
|
||||||
|
|||||||
@@ -26,4 +26,6 @@ void monitor_streaming_standby(void);
|
|||||||
void monitor_streaming_witness(void);
|
void monitor_streaming_witness(void);
|
||||||
void close_connections_physical(void);
|
void close_connections_physical(void);
|
||||||
|
|
||||||
|
void handle_sigint_physical(SIGNAL_ARGS);
|
||||||
|
|
||||||
#endif /* _REPMGRD_PHYSICAL_H_ */
|
#endif /* _REPMGRD_PHYSICAL_H_ */
|
||||||
|
|||||||
44
repmgrd.c
44
repmgrd.c
@@ -73,7 +73,6 @@ static void start_monitoring(void);
|
|||||||
#ifndef WIN32
|
#ifndef WIN32
|
||||||
static void setup_event_handlers(void);
|
static void setup_event_handlers(void);
|
||||||
static void handle_sighup(SIGNAL_ARGS);
|
static void handle_sighup(SIGNAL_ARGS);
|
||||||
static void handle_sigint(SIGNAL_ARGS);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int calculate_elapsed(instr_time start_time);
|
int calculate_elapsed(instr_time start_time);
|
||||||
@@ -255,6 +254,8 @@ main(int argc, char **argv)
|
|||||||
strncpy(config_file_options.log_level, cli_log_level, MAXLEN);
|
strncpy(config_file_options.log_level, cli_log_level, MAXLEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log_notice(_("repmgrd (repmgr %s) starting up"), REPMGR_VERSION);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* -m/--monitoring-history, if provided, will override repmgr.conf's
|
* -m/--monitoring-history, if provided, will override repmgr.conf's
|
||||||
* monitoring_history; this is for backwards compatibility as it's
|
* monitoring_history; this is for backwards compatibility as it's
|
||||||
@@ -353,10 +354,24 @@ main(int argc, char **argv)
|
|||||||
/* Retrieve record for this node from the local database */
|
/* Retrieve record for this node from the local database */
|
||||||
record_status = get_node_record(local_conn, config_file_options.node_id, &local_node_info);
|
record_status = get_node_record(local_conn, config_file_options.node_id, &local_node_info);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Terminate if we can't find the local node record. This is a
|
||||||
|
* "fix-the-config" situation, not a lot else we can do.
|
||||||
|
*/
|
||||||
|
|
||||||
if (record_status != RECORD_FOUND)
|
if (record_status != RECORD_FOUND)
|
||||||
{
|
{
|
||||||
log_error(_("no metadata record found for this node - terminating"));
|
log_error(_("no metadata record found for this node - terminating"));
|
||||||
log_hint(_("check that 'repmgr (primary|standby) register' was executed for this node"));
|
|
||||||
|
switch (config_file_options.replication_type)
|
||||||
|
{
|
||||||
|
case REPLICATION_TYPE_PHYSICAL:
|
||||||
|
log_hint(_("check that 'repmgr (primary|standby) register' was executed for this node"));
|
||||||
|
break;
|
||||||
|
case REPLICATION_TYPE_BDR:
|
||||||
|
log_hint(_("check that 'repmgr bdr register' was executed for this node"));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
PQfinish(local_conn);
|
PQfinish(local_conn);
|
||||||
terminate(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
@@ -440,6 +455,7 @@ start_monitoring(void)
|
|||||||
break;
|
break;
|
||||||
case WITNESS:
|
case WITNESS:
|
||||||
monitor_streaming_witness();
|
monitor_streaming_witness();
|
||||||
|
break;
|
||||||
case BDR:
|
case BDR:
|
||||||
monitor_bdr();
|
monitor_bdr();
|
||||||
return;
|
return;
|
||||||
@@ -612,11 +628,6 @@ check_and_create_pid_file(const char *pid_file)
|
|||||||
|
|
||||||
|
|
||||||
#ifndef WIN32
|
#ifndef WIN32
|
||||||
static void
|
|
||||||
handle_sigint(SIGNAL_ARGS)
|
|
||||||
{
|
|
||||||
terminate(SUCCESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* SIGHUP: set flag to re-read config file at next convenient time */
|
/* SIGHUP: set flag to re-read config file at next convenient time */
|
||||||
static void
|
static void
|
||||||
@@ -629,8 +640,23 @@ static void
|
|||||||
setup_event_handlers(void)
|
setup_event_handlers(void)
|
||||||
{
|
{
|
||||||
pqsignal(SIGHUP, handle_sighup);
|
pqsignal(SIGHUP, handle_sighup);
|
||||||
pqsignal(SIGINT, handle_sigint);
|
|
||||||
pqsignal(SIGTERM, handle_sigint);
|
/*
|
||||||
|
* we want to be able to write a "repmgrd_shutdown" event, so delegate
|
||||||
|
* signal handling to the respective replication type handler, as it
|
||||||
|
* will know best which database connection to use
|
||||||
|
*/
|
||||||
|
switch (config_file_options.replication_type)
|
||||||
|
{
|
||||||
|
case REPLICATION_TYPE_BDR:
|
||||||
|
pqsignal(SIGINT, handle_sigint_bdr);
|
||||||
|
pqsignal(SIGTERM, handle_sigint_bdr);
|
||||||
|
break;
|
||||||
|
case REPLICATION_TYPE_PHYSICAL:
|
||||||
|
pqsignal(SIGINT, handle_sigint_physical);
|
||||||
|
pqsignal(SIGTERM, handle_sigint_physical);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user