mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 07:06:30 +00:00
Compare commits
17 Commits
v4.1.1
...
REL4_1_STA
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
222f7e6080 | ||
|
|
446695e328 | ||
|
|
ec3da13e22 | ||
|
|
1488c014ff | ||
|
|
f471316504 | ||
|
|
726299f7ef | ||
|
|
7fda2a1bcf | ||
|
|
d26141b8ab | ||
|
|
4a6b5fe913 | ||
|
|
a71e644255 | ||
|
|
8646fd6004 | ||
|
|
3e1bb1a523 | ||
|
|
f5e58fc062 | ||
|
|
6b95a96f3a | ||
|
|
bd146ae9ac | ||
|
|
c7f8e48d12 | ||
|
|
322190516c |
@@ -1052,11 +1052,13 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL
|
||||
* - repmgrd_standby_startup_timeout
|
||||
* - retry_promote_interval_secs
|
||||
*
|
||||
* non-changeable options
|
||||
* non-changeable options (repmgrd references these from the "repmgr.nodes"
|
||||
* table, not the configuration file)
|
||||
*
|
||||
* - node_id
|
||||
* - node_name
|
||||
* - data_directory
|
||||
* - location
|
||||
* - priority
|
||||
* - replication_type
|
||||
*
|
||||
@@ -1272,7 +1274,7 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
||||
config_changed = true;
|
||||
}
|
||||
|
||||
/* promote_delay */
|
||||
/* promote_delay (for testing use only; not documented */
|
||||
if (orig_options->promote_delay != new_options.promote_delay)
|
||||
{
|
||||
orig_options->promote_delay = new_options.promote_delay;
|
||||
|
||||
18
configure
vendored
18
configure
vendored
@@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for repmgr 4.1.1.
|
||||
# Generated by GNU Autoconf 2.69 for repmgr 4.1.2.
|
||||
#
|
||||
# Report bugs to <pgsql-bugs@postgresql.org>.
|
||||
#
|
||||
@@ -582,8 +582,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='repmgr'
|
||||
PACKAGE_TARNAME='repmgr'
|
||||
PACKAGE_VERSION='4.1.1'
|
||||
PACKAGE_STRING='repmgr 4.1.1'
|
||||
PACKAGE_VERSION='4.1.2'
|
||||
PACKAGE_STRING='repmgr 4.1.2'
|
||||
PACKAGE_BUGREPORT='pgsql-bugs@postgresql.org'
|
||||
PACKAGE_URL='https://2ndquadrant.com/en/resources/repmgr/'
|
||||
|
||||
@@ -1178,7 +1178,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures repmgr 4.1.1 to adapt to many kinds of systems.
|
||||
\`configure' configures repmgr 4.1.2 to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@@ -1239,7 +1239,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of repmgr 4.1.1:";;
|
||||
short | recursive ) echo "Configuration of repmgr 4.1.2:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@@ -1313,7 +1313,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
repmgr configure 4.1.1
|
||||
repmgr configure 4.1.2
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@@ -1332,7 +1332,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by repmgr $as_me 4.1.1, which was
|
||||
It was created by repmgr $as_me 4.1.2, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@@ -2359,7 +2359,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by repmgr $as_me 4.1.1, which was
|
||||
This file was extended by repmgr $as_me 4.1.2, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@@ -2422,7 +2422,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
repmgr config.status 4.1.1
|
||||
repmgr config.status 4.1.2
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
AC_INIT([repmgr], [4.1.1], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
|
||||
AC_INIT([repmgr], [4.1.2], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
|
||||
|
||||
AC_COPYRIGHT([Copyright (c) 2010-2018, 2ndQuadrant Ltd.])
|
||||
|
||||
|
||||
@@ -227,7 +227,15 @@ get_controlfile(const char *DataDir)
|
||||
|
||||
control_file_info->control_file_processed = true;
|
||||
|
||||
if (version_num >= 90500)
|
||||
if (version_num >= 110000)
|
||||
{
|
||||
ControlFileData11 *ptr = (struct ControlFileData11 *)ControlFileDataPtr;
|
||||
control_file_info->system_identifier = ptr->system_identifier;
|
||||
control_file_info->state = ptr->state;
|
||||
control_file_info->checkPoint = ptr->checkPoint;
|
||||
control_file_info->data_checksum_version = ptr->data_checksum_version;
|
||||
}
|
||||
else if (version_num >= 90500)
|
||||
{
|
||||
ControlFileData95 *ptr = (struct ControlFileData95 *)ControlFileDataPtr;
|
||||
control_file_info->system_identifier = ptr->system_identifier;
|
||||
|
||||
@@ -265,6 +265,71 @@ typedef struct ControlFileData95
|
||||
|
||||
} ControlFileData95;
|
||||
|
||||
/*
|
||||
* Following field removed in 11:
|
||||
*
|
||||
* XLogRecPtr prevCheckPoint;
|
||||
*
|
||||
* In 10, following field appended *after* "data_checksum_version":
|
||||
*
|
||||
* char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN];
|
||||
*
|
||||
* (but we don't care about that)
|
||||
*/
|
||||
|
||||
typedef struct ControlFileData11
|
||||
{
|
||||
uint64 system_identifier;
|
||||
|
||||
uint32 pg_control_version; /* PG_CONTROL_VERSION */
|
||||
uint32 catalog_version_no; /* see catversion.h */
|
||||
|
||||
DBState state; /* see enum above */
|
||||
pg_time_t time; /* time stamp of last pg_control update */
|
||||
XLogRecPtr checkPoint; /* last check point record ptr */
|
||||
|
||||
CheckPoint95 checkPointCopy; /* copy of last check point record */
|
||||
|
||||
XLogRecPtr unloggedLSN; /* current fake LSN value, for unlogged rels */
|
||||
|
||||
XLogRecPtr minRecoveryPoint;
|
||||
TimeLineID minRecoveryPointTLI;
|
||||
XLogRecPtr backupStartPoint;
|
||||
XLogRecPtr backupEndPoint;
|
||||
bool backupEndRequired;
|
||||
|
||||
int wal_level;
|
||||
bool wal_log_hints;
|
||||
int MaxConnections;
|
||||
int max_worker_processes;
|
||||
int max_prepared_xacts;
|
||||
int max_locks_per_xact;
|
||||
bool track_commit_timestamp;
|
||||
|
||||
uint32 maxAlign; /* alignment requirement for tuples */
|
||||
double floatFormat; /* constant 1234567.0 */
|
||||
|
||||
uint32 blcksz; /* data block size for this DB */
|
||||
uint32 relseg_size; /* blocks per segment of large relation */
|
||||
|
||||
uint32 xlog_blcksz; /* block size within WAL files */
|
||||
uint32 xlog_seg_size; /* size of each WAL segment */
|
||||
|
||||
uint32 nameDataLen; /* catalog name field width */
|
||||
uint32 indexMaxKeys; /* max number of columns in an index */
|
||||
|
||||
uint32 toast_max_chunk_size; /* chunk size in TOAST tables */
|
||||
uint32 loblksize; /* chunk size in pg_largeobject */
|
||||
|
||||
bool enableIntTimes; /* int64 storage enabled? */
|
||||
|
||||
bool float4ByVal; /* float4 pass-by-value? */
|
||||
bool float8ByVal; /* float8, int8, etc pass-by-value? */
|
||||
|
||||
uint32 data_checksum_version;
|
||||
|
||||
} ControlFileData11;
|
||||
|
||||
|
||||
|
||||
extern DBState get_db_state(const char *data_directory);
|
||||
|
||||
@@ -33,10 +33,9 @@
|
||||
<para>
|
||||
<itemizedlist>
|
||||
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-standby-clone">repmgr standby switchover --dry-run</link></command>
|
||||
<command><link linkend="repmgr-standby-switchover">repmgr standby switchover --dry-run</link></command>
|
||||
no longer copies external configuration files to test they can be copied; this avoids making
|
||||
any changes to the target system. (GitHub #491).
|
||||
</para>
|
||||
@@ -45,7 +44,7 @@
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-cluster-cleanup">repmgr cluster cleanup</link></command>:
|
||||
add <literal>cluster_cleanup</literal> event. (GitHub #492)
|
||||
add <literal>cluster_cleanup</literal> event. (GitHub #492).
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
|
||||
@@ -17,15 +17,15 @@
|
||||
<link linkend="repmgr-node-rejoin"><command>repmgr node rejoin</command></link>.
|
||||
</para>
|
||||
<para>
|
||||
By default, &repmgr; will use PostgreSQL's <command>pg_ctl</command> to control the PostgreSQL
|
||||
By default, &repmgr; will use PostgreSQL's <command>pg_ctl</command> utility to control the PostgreSQL
|
||||
server. However this can lead to various problems, particularly when PostgreSQL has been
|
||||
installed from packages, and expecially so if <application>systemd</application> is in use.
|
||||
installed from packages, and especially so if <application>systemd</application> is in use.
|
||||
</para>
|
||||
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If using <application>systemd</application>, ensure you have <varname>RemoteIPC</varname> set to <literal>off</literal>.
|
||||
If using <application>systemd</application>, ensure you have <varname>RemoveIPC</varname> set to <literal>off</literal>.
|
||||
See the <ulink url="https://wiki.postgresql.org/wiki/Systemd">systemd</ulink>
|
||||
entry in the <ulink url="https://wiki.postgresql.org/wiki/Main_Page">PostgreSQL wiki</ulink> for details.
|
||||
</para>
|
||||
@@ -99,7 +99,7 @@
|
||||
Defaults:postgres !requiretty
|
||||
postgres ALL = NOPASSWD: /usr/bin/systemctl stop postgresql-9.6, \
|
||||
/usr/bin/systemctl start postgresql-9.6, \
|
||||
/usr/bin/systemctl restart postgresql-9.6 \
|
||||
/usr/bin/systemctl restart postgresql-9.6, \
|
||||
/usr/bin/systemctl reload postgresql-9.6</programlisting>
|
||||
</para>
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@
|
||||
<title>2ndQuadrant public RPM yum repository</title>
|
||||
|
||||
<para>
|
||||
Beginning with <ulink url="https://repmgr.org/docs/4.0/release-4.0.5.html">repmgr 4.0.5</ulink>,
|
||||
Beginning with <ulink url="https://repmgr.org/docs/4.1/release-4.0.5.html">repmgr 4.0.5</ulink>,
|
||||
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a dedicated <literal>yum</literal>
|
||||
<ulink url="https://dl.2ndquadrant.com/">public repository</ulink> for 2ndQuadrant software,
|
||||
including &repmgr;. We recommend using this for all future &repmgr; releases.
|
||||
|
||||
@@ -81,36 +81,56 @@
|
||||
|
||||
<refsect1>
|
||||
<title>Options</title>
|
||||
<para>
|
||||
<command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
|
||||
outputs the replication cluster's status in a simple CSV format, suitable for
|
||||
parsing by scripts:
|
||||
<programlisting>
|
||||
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--csv</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
|
||||
outputs the replication cluster's status in a simple CSV format, suitable for
|
||||
parsing by scripts:
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.conf cluster show --csv
|
||||
1,-1,-1
|
||||
2,0,0
|
||||
3,0,1</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
The columns have following meanings:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara>
|
||||
node ID
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
</para>
|
||||
<para>
|
||||
The columns have following meanings:
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara>
|
||||
node ID
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
availability (0 = available, -1 = unavailable)
|
||||
</simpara>
|
||||
</listitem>
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--verbose</option></term>
|
||||
<listitem>
|
||||
<simpara>
|
||||
recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
|
||||
</simpara>
|
||||
<para>
|
||||
Display the full text of any database connection error messages
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
|
||||
</refsect1>
|
||||
|
||||
|
||||
|
||||
@@ -67,10 +67,10 @@
|
||||
<term><option>--force-rewind[=/path/to/pg_rewind]</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Execute <application>pg_rewind</application> if necessary.
|
||||
Execute <application>pg_rewind</application>.
|
||||
</para>
|
||||
<para>
|
||||
It is only necessary to provide the <application>pg_rewind</application>
|
||||
It is only necessary to provide the <application>pg_rewind</application> path
|
||||
if using PostgreSQL 9.3 or 9.4, and <application>pg_rewind</application>
|
||||
is not installed in the PostgreSQL <filename>bin</filename> directory.
|
||||
</para>
|
||||
@@ -193,7 +193,7 @@
|
||||
</note>
|
||||
|
||||
<para>
|
||||
To have <command>repmgr node rejoin</command> use <command>pg_rewind</command> if required,
|
||||
To have <command>repmgr node rejoin</command> use <command>pg_rewind</command>,
|
||||
pass the command line option <literal>--force-rewind</literal>, which will tell &repmgr;
|
||||
to execute <command>pg_rewind</command> to ensure the node can be rejoined successfully.
|
||||
</para>
|
||||
@@ -226,6 +226,15 @@
|
||||
INFO: pg_rewind would now be executed
|
||||
DETAIL: pg_rewind command is:
|
||||
pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node1 dbname=repmgr user=repmgr'</programlisting>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If <option>--force-rewind</option> is used with the <option>--dry-run</option> option,
|
||||
this checks the prerequisites for using <application>pg_rewind</application>, but cannot
|
||||
predict the outcome of actually executing <application>pg_rewind</application>.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<programlisting>
|
||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node1 dbname=repmgr user=repmgr' \
|
||||
--force-rewind --config-files=postgresql.local.conf,postgresql.conf --verbose
|
||||
|
||||
@@ -25,7 +25,13 @@
|
||||
<para>
|
||||
This is the official documentation of &repmgr; &repmgrversion; for
|
||||
use with PostgreSQL 9.3 - PostgreSQL 10.
|
||||
It describes the functionality supported by the current version of &repmgr;.
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; is being continually developed and we strongly recommend using the
|
||||
latest version. Please check the
|
||||
<ulink url="https://repmgr.org/">repmgr website</ulink> for details
|
||||
about the current &repmgr; version as well as the
|
||||
<ulink url="https://repmgr.org/docs/current/index.html">current documentation</ulink>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
|
||||
@@ -34,24 +34,6 @@
|
||||
the <ulink url="https://www.postgresql.org/docs/current/static/runtime-config-client.html#GUC-SHARED-PRELOAD-LIBRARIES">PostgreSQL documentation</ulink>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To apply configuration file changes to a running <application>repmgrd</application>
|
||||
daemon, execute the operating system's r<application>repmgrd</application> service reload command
|
||||
(see <xref linkend="appendix-packages"> for examples),
|
||||
or for instances which were manually started, execute <command>kill -HUP</command>, e.g.
|
||||
<command>kill -HUP `cat /tmp/repmgrd.pid`</command>.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
Check the <application>repmgrd</application> log to see what changes were
|
||||
applied, or if any issues were encountered when reloading the configuration.
|
||||
</para>
|
||||
</note>
|
||||
<para>
|
||||
Note that only a subset of configuration file parameters can be changed on a
|
||||
running <application>repmgrd</application> daemon.
|
||||
</para>
|
||||
|
||||
|
||||
<sect2 id="repmgrd-automatic-failover-configuration">
|
||||
<title>automatic failover configuration</title>
|
||||
@@ -167,6 +149,203 @@
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2 id="repmgrd-reloading-configuration"xreflabel="reloading repmgrd configuration">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>applying configuration changes</secondary>
|
||||
</indexterm>
|
||||
<title>Applying configuration changes to repmgrd</title>
|
||||
<para>
|
||||
To apply configuration file changes to a running <application>repmgrd</application>
|
||||
daemon, execute the operating system's <application>repmgrd</application> service reload command
|
||||
(see <xref linkend="appendix-packages"> for examples),
|
||||
or for instances which were manually started, execute <command>kill -HUP</command>, e.g.
|
||||
<command>kill -HUP `cat /tmp/repmgrd.pid`</command>.
|
||||
</para>
|
||||
<tip>
|
||||
<para>
|
||||
Check the <application>repmgrd</application> log to see what changes were
|
||||
applied, or if any issues were encountered when reloading the configuration.
|
||||
</para>
|
||||
</tip>
|
||||
<para>
|
||||
Note that only the following subset of configuration file parameters can be changed on a
|
||||
running <application>repmgrd</application> daemon:
|
||||
</para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>async_query_timeout</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>bdr_local_monitoring_only</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>bdr_recovery_timeout</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>conninfo</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>degraded_monitoring_timeout</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>event_notification_command</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>event_notifications</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>failover</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>follow_command</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>log_facility</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>log_file</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>log_level</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>log_status_interval</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>monitor_interval_secs</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>monitoring_history</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>primary_notification_timeout</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>promote_command</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>reconnect_attempts</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>reconnect_interval</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>repmgrd_standby_startup_timeout</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
<para>
|
||||
The following set of configuration file parameters must be updated via
|
||||
<command><link linkend="repmgr-standby-register">repmgr standby register --force</link></command>,
|
||||
as they require changes to the <literal>repmgr.nodes</literal> table so they are visible to
|
||||
all nodes in the replication cluster:
|
||||
</para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>node_id</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>node_name</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>data_directory</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>location</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<varname>priority</varname>
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
After executing <command><link linkend="repmgr-standby-register">repmgr standby register --force</link></command>,
|
||||
<application>repmgrd</application> <emphasis>must</emphasis> be restarted for the changes to take effect.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="repmgrd-daemon">
|
||||
|
||||
@@ -84,6 +84,7 @@ do_cluster_show(void)
|
||||
ItemList warnings = {NULL, NULL};
|
||||
bool success = false;
|
||||
bool error_found = false;
|
||||
bool connection_error_found = false;
|
||||
|
||||
/* Connect to local database to obtain cluster connection data */
|
||||
log_verbose(LOG_INFO, _("connecting to database"));
|
||||
@@ -141,14 +142,26 @@ do_cluster_show(void)
|
||||
}
|
||||
else
|
||||
{
|
||||
char error[MAXLEN];
|
||||
|
||||
strncpy(error, PQerrorMessage(cell->node_info->conn), MAXLEN);
|
||||
cell->node_info->node_status = NODE_STATUS_DOWN;
|
||||
cell->node_info->recovery_type = RECTYPE_UNKNOWN;
|
||||
item_list_append_format(&warnings,
|
||||
"when attempting to connect to node \"%s\" (ID: %i), following error encountered :\n\"%s\"",
|
||||
cell->node_info->node_name, cell->node_info->node_id, trim(error));
|
||||
|
||||
connection_error_found = true;
|
||||
|
||||
if (runtime_options.verbose)
|
||||
{
|
||||
char error[MAXLEN];
|
||||
|
||||
strncpy(error, PQerrorMessage(cell->node_info->conn), MAXLEN);
|
||||
item_list_append_format(&warnings,
|
||||
"when attempting to connect to node \"%s\" (ID: %i), following error encountered :\n\"%s\"",
|
||||
cell->node_info->node_name, cell->node_info->node_id, trim(error));
|
||||
}
|
||||
else
|
||||
{
|
||||
item_list_append_format(&warnings,
|
||||
"unable to connect to node \"%s\" (ID: %i)",
|
||||
cell->node_info->node_name, cell->node_info->node_id);
|
||||
}
|
||||
}
|
||||
|
||||
initPQExpBuffer(&details);
|
||||
@@ -437,6 +450,11 @@ do_cluster_show(void)
|
||||
{
|
||||
printf(_(" - %s\n"), cell->string);
|
||||
}
|
||||
|
||||
if (runtime_options.verbose == false && connection_error_found == true)
|
||||
{
|
||||
log_hint(_("execute with --verbose option to see connection error messages"));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
4
repmgr.c
4
repmgr.c
@@ -416,9 +416,9 @@ unset_bdr_failover_handler(PG_FUNCTION_ARGS)
|
||||
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
|
||||
|
||||
shared_state->bdr_failover_handler = UNKNOWN_NODE_ID;
|
||||
|
||||
LWLockRelease(shared_state->lock);
|
||||
}
|
||||
|
||||
LWLockRelease(shared_state->lock);
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
||||
@@ -161,7 +161,7 @@
|
||||
# Examples:
|
||||
#
|
||||
# pg_ctl_options='-s'
|
||||
# pg_basebackup_options='--label=repmgr_backup
|
||||
# pg_basebackup_options='--label=repmgr_backup'
|
||||
# rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
||||
# ssh_options=-o "StrictHostKeyChecking no"
|
||||
|
||||
@@ -281,9 +281,9 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
||||
# a value of zero prevents the node being promoted to primary
|
||||
# (default: 100)
|
||||
|
||||
#reconnect_attempts=6 # Number attempts which will be made to reconnect to an unreachable
|
||||
#reconnect_attempts=6 # Number of attempts which will be made to reconnect to an unreachable
|
||||
# primary (or other upstream node)
|
||||
#reconnect_interval=10 # Interval between attempts to reconnect to an unreachable
|
||||
#reconnect_interval=10 # Interval between attempts to reconnect to an unreachable
|
||||
# primary (or other upstream node)
|
||||
#promote_command= # command repmgrd executes when promoting a new primary; use something like:
|
||||
#
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
#define REPMGR_VERSION_DATE ""
|
||||
#define REPMGR_VERSION "4.1.1"
|
||||
#define REPMGR_VERSION "4.1.2"
|
||||
|
||||
@@ -268,7 +268,12 @@ monitor_streaming_primary(void)
|
||||
* TODO: cache node list here, refresh at `node_list_refresh_interval`
|
||||
* also return reason for inavailability so we can log it
|
||||
*/
|
||||
if (is_server_available(local_node_info.conninfo) == false)
|
||||
|
||||
(void) connection_ping(local_conn);
|
||||
|
||||
check_connection(&local_node_info, &local_conn);
|
||||
|
||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||
{
|
||||
|
||||
/* local node is down, we were expecting it to be up */
|
||||
@@ -308,6 +313,7 @@ monitor_streaming_primary(void)
|
||||
if (local_node_info.node_status == NODE_STATUS_UP)
|
||||
{
|
||||
int local_node_unreachable_elapsed = calculate_elapsed(local_node_unreachable_start);
|
||||
int stored_local_node_id = UNKNOWN_NODE_ID;
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
@@ -324,6 +330,17 @@ monitor_streaming_primary(void)
|
||||
event_details.data);
|
||||
termPQExpBuffer(&event_details);
|
||||
|
||||
/*
|
||||
* If the local node was restarted, we'll need to reinitialise values
|
||||
* stored in shared memory.
|
||||
*/
|
||||
|
||||
stored_local_node_id = repmgrd_get_local_node_id(local_conn);
|
||||
if (stored_local_node_id == UNKNOWN_NODE_ID)
|
||||
{
|
||||
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
|
||||
}
|
||||
|
||||
goto loop;
|
||||
}
|
||||
|
||||
@@ -991,6 +1008,13 @@ monitor_streaming_standby(void)
|
||||
continue;
|
||||
}
|
||||
|
||||
/* skip witness node - we can't possibly "follow" that */
|
||||
|
||||
if (cell->node_info->type == WITNESS)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
|
||||
|
||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||
@@ -1013,6 +1037,7 @@ monitor_streaming_standby(void)
|
||||
follow_new_primary(follow_node_id);
|
||||
}
|
||||
}
|
||||
|
||||
clear_node_info_list(&sibling_nodes);
|
||||
}
|
||||
}
|
||||
@@ -1136,8 +1161,11 @@ loop:
|
||||
}
|
||||
else
|
||||
{
|
||||
/* we've reconnected to the local node after an outage */
|
||||
if (local_node_info.active == false)
|
||||
{
|
||||
int stored_local_node_id = UNKNOWN_NODE_ID;
|
||||
|
||||
if (PQstatus(primary_conn) == CONNECTION_OK)
|
||||
{
|
||||
if (update_node_record_set_active(primary_conn, local_node_info.node_id, true) == true)
|
||||
@@ -1153,19 +1181,29 @@ loop:
|
||||
local_node_info.node_name,
|
||||
local_node_info.node_id);
|
||||
|
||||
log_warning("%s", event_details.data)
|
||||
log_notice("%s", event_details.data);
|
||||
|
||||
|
||||
create_event_notification(primary_conn,
|
||||
&config_file_options,
|
||||
local_node_info.node_id,
|
||||
"standby_recovery",
|
||||
true,
|
||||
event_details.data);
|
||||
create_event_notification(primary_conn,
|
||||
&config_file_options,
|
||||
local_node_info.node_id,
|
||||
"standby_recovery",
|
||||
true,
|
||||
event_details.data);
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the local node was restarted, we'll need to reinitialise values
|
||||
* stored in shared memory.
|
||||
*/
|
||||
|
||||
stored_local_node_id = repmgrd_get_local_node_id(local_conn);
|
||||
if (stored_local_node_id == UNKNOWN_NODE_ID)
|
||||
{
|
||||
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1201,7 +1239,7 @@ monitor_streaming_witness(void)
|
||||
/*
|
||||
* At this point we can't trust the local copy of "repmgr.nodes", as
|
||||
* it may not have been updated. We'll scan the cluster for the current
|
||||
* primary and refresh the copy from that before proceeding further.
|
||||
['' * primary and refresh the copy from that before proceeding further.
|
||||
*/
|
||||
primary_conn = get_primary_connection_quiet(local_conn, &primary_node_id, NULL);
|
||||
|
||||
@@ -1437,6 +1475,105 @@ monitor_streaming_witness(void)
|
||||
}
|
||||
loop:
|
||||
|
||||
/*
|
||||
* handle local node failure
|
||||
*
|
||||
* currently we'll just check the connection, and try to reconnect
|
||||
*
|
||||
* TODO: add timeout, after which we run in degraded state
|
||||
*/
|
||||
|
||||
(void) connection_ping(local_conn);
|
||||
|
||||
check_connection(&local_node_info, &local_conn);
|
||||
|
||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||
{
|
||||
if (local_node_info.active == true)
|
||||
{
|
||||
bool success = true;
|
||||
PQExpBufferData event_details;
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
local_node_info.active = false;
|
||||
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("unable to connect to local node \"%s\" (ID: %i), marking inactive"),
|
||||
local_node_info.node_name,
|
||||
local_node_info.node_id);
|
||||
log_notice("%s", event_details.data);
|
||||
|
||||
if (PQstatus(primary_conn) == CONNECTION_OK)
|
||||
{
|
||||
if (update_node_record_set_active(primary_conn, local_node_info.node_id, false) == false)
|
||||
{
|
||||
success = false;
|
||||
log_warning(_("unable to mark node \"%s\" (ID: %i) as inactive"),
|
||||
local_node_info.node_name,
|
||||
local_node_info.node_id);
|
||||
}
|
||||
}
|
||||
|
||||
create_event_notification(primary_conn,
|
||||
&config_file_options,
|
||||
local_node_info.node_id,
|
||||
"standby_failure",
|
||||
success,
|
||||
event_details.data);
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* we've reconnected to the local node after an outage */
|
||||
if (local_node_info.active == false)
|
||||
{
|
||||
int stored_local_node_id = UNKNOWN_NODE_ID;
|
||||
|
||||
if (PQstatus(primary_conn) == CONNECTION_OK)
|
||||
{
|
||||
if (update_node_record_set_active(primary_conn, local_node_info.node_id, true) == true)
|
||||
{
|
||||
PQExpBufferData event_details;
|
||||
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
local_node_info.active = true;
|
||||
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("reconnected to local node \"%s\" (ID: %i), marking active"),
|
||||
local_node_info.node_name,
|
||||
local_node_info.node_id);
|
||||
|
||||
log_notice("%s", event_details.data);
|
||||
|
||||
create_event_notification(primary_conn,
|
||||
&config_file_options,
|
||||
local_node_info.node_id,
|
||||
"standby_recovery",
|
||||
true,
|
||||
event_details.data);
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the local node was restarted, we'll need to reinitialise values
|
||||
* stored in shared memory.
|
||||
*/
|
||||
|
||||
stored_local_node_id = repmgrd_get_local_node_id(local_conn);
|
||||
if (stored_local_node_id == UNKNOWN_NODE_ID)
|
||||
{
|
||||
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* refresh repmgr.nodes after "witness_sync_interval" seconds */
|
||||
|
||||
{
|
||||
@@ -1480,6 +1617,7 @@ loop:
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (got_SIGHUP)
|
||||
{
|
||||
handle_sighup(&local_conn, WITNESS);
|
||||
@@ -2256,6 +2394,8 @@ follow_new_primary(int new_primary_id)
|
||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||
bool new_primary_ok = false;
|
||||
|
||||
log_verbose(LOG_DEBUG, "follow_new_primary(): new primary id is %i", new_primary_id);
|
||||
|
||||
record_status = get_node_record(local_conn, new_primary_id, &new_primary);
|
||||
|
||||
if (record_status != RECORD_FOUND)
|
||||
@@ -2934,9 +3074,18 @@ check_connection(t_node_info *node_info, PGconn **conn)
|
||||
}
|
||||
else
|
||||
{
|
||||
int stored_local_node_id = UNKNOWN_NODE_ID;
|
||||
|
||||
log_info(_("reconnected to node \"%s\" (ID: %i)"),
|
||||
node_info->node_name,
|
||||
node_info->node_id);
|
||||
|
||||
stored_local_node_id = repmgrd_get_local_node_id(*conn);
|
||||
if (stored_local_node_id == UNKNOWN_NODE_ID)
|
||||
{
|
||||
repmgrd_set_local_node_id(*conn, config_file_options.node_id);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user