mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-25 16:16:29 +00:00
Compare commits
29 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
afdaf9be66 | ||
|
|
8067924c3e | ||
|
|
e94a6eefde | ||
|
|
69d7b6f7eb | ||
|
|
8ec3b2a536 | ||
|
|
68a9745e7e | ||
|
|
20ce53e2d2 | ||
|
|
638a119c85 | ||
|
|
053863cdd0 | ||
|
|
009cc0480c | ||
|
|
63bdc19132 | ||
|
|
fbd389d0b3 | ||
|
|
4aef4ea11e | ||
|
|
0ffaff75df | ||
|
|
c54bb73fb2 | ||
|
|
28ea2e48de | ||
|
|
41274f5525 | ||
|
|
edceb32ccb | ||
|
|
3dba8336e9 | ||
|
|
97d0cee259 | ||
|
|
2dfe1d18e9 | ||
|
|
55bb93bd3f | ||
|
|
4c49954cd4 | ||
|
|
a880b6ce16 | ||
|
|
c51a2283dd | ||
|
|
717828e73e | ||
|
|
c7477d7a9c | ||
|
|
1db8d3904f | ||
|
|
362f478d55 |
19
HISTORY
19
HISTORY
@@ -1,3 +1,22 @@
|
||||
4.0.6 2018-06-14
|
||||
repmgr: (witness register) prevent registration of a witness server with the
|
||||
same name as an existing node (Ian)
|
||||
repmgr: (standby follow) check node has actually connected to new primary
|
||||
before reporting success; GitHub #444 (Ian)
|
||||
repmgr: (standby clone) improve handling of external configuration file copying,
|
||||
including consideration in --dry-run check; GitHub #443 (Ian)
|
||||
repmgr: (standby clone) don't require presence of "user" parameter in
|
||||
conninfo string; GitHub #437 (Ian)
|
||||
repmgr: (standby clone) improve documentation of --recovery-conf-only
|
||||
mode; GitHub #438 (Ian)
|
||||
repmgr: (node rejoin) fix bug when parsing --config-files parameter;
|
||||
GitHub #442 (Ian)
|
||||
repmgr: when using --dry-run, force log level to INFO to ensure output
|
||||
will always be displayed; GitHub #441 (Ian)
|
||||
repmgr: (cluster matrix/crosscheck) return non-zero exit code if node
|
||||
connection issues detected; GitHub #447 (Ian)
|
||||
repmgrd: ensure local node is counted as quorum member; GitHub #439 (Ian)
|
||||
|
||||
4.0.5 2018-05-02
|
||||
repmgr: poll demoted primary after restart as a standby during a
|
||||
switchover operation; GitHub #408 (Ian)
|
||||
|
||||
20
configfile.c
20
configfile.c
@@ -319,13 +319,20 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
options->use_primary_conninfo_password = false;
|
||||
memset(options->passfile, 0, sizeof(options->passfile));
|
||||
|
||||
/*-----------------------
|
||||
/*-------------------------
|
||||
* standby promote settings
|
||||
*------------------------
|
||||
*-------------------------
|
||||
*/
|
||||
options->promote_check_timeout = DEFAULT_PROMOTE_CHECK_TIMEOUT;
|
||||
options->promote_check_interval = DEFAULT_PROMOTE_CHECK_INTERVAL;
|
||||
|
||||
/*------------------------
|
||||
* standby follow settings
|
||||
*------------------------
|
||||
*/
|
||||
options->primary_follow_timeout = DEFAULT_PRIMARY_FOLLOW_TIMEOUT;
|
||||
options->standby_follow_timeout = DEFAULT_STANDBY_FOLLOW_TIMEOUT;
|
||||
|
||||
/*-----------------
|
||||
* repmgrd settings
|
||||
*-----------------
|
||||
@@ -345,7 +352,6 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
options->degraded_monitoring_timeout = -1;
|
||||
options->async_query_timeout = DEFAULT_ASYNC_QUERY_TIMEOUT;
|
||||
options->primary_notification_timeout = DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT;
|
||||
options->primary_follow_timeout = DEFAULT_PRIMARY_FOLLOW_TIMEOUT;
|
||||
options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT;
|
||||
|
||||
/*-------------
|
||||
@@ -527,6 +533,12 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
else if (strcmp(name, "promote_check_interval") == 0)
|
||||
options->promote_check_interval = repmgr_atoi(value, name, error_list, 1);
|
||||
|
||||
/* standby follow settings */
|
||||
else if (strcmp(name, "primary_follow_timeout") == 0)
|
||||
options->primary_follow_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "standby_follow_timeout") == 0)
|
||||
options->standby_follow_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
|
||||
/* node check settings */
|
||||
else if (strcmp(name, "archive_ready_warning") == 0)
|
||||
options->archive_ready_warning = repmgr_atoi(value, name, error_list, 1);
|
||||
@@ -576,8 +588,6 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
options->async_query_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "primary_notification_timeout") == 0)
|
||||
options->primary_notification_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "primary_follow_timeout") == 0)
|
||||
options->primary_follow_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "standby_reconnect_timeout") == 0)
|
||||
options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
|
||||
|
||||
@@ -98,6 +98,10 @@ typedef struct
|
||||
int promote_check_timeout;
|
||||
int promote_check_interval;
|
||||
|
||||
/* standby follow settings */
|
||||
int primary_follow_timeout;
|
||||
int standby_follow_timeout;
|
||||
|
||||
/* node check settings */
|
||||
int archive_ready_warning;
|
||||
int archive_ready_critical;
|
||||
@@ -120,7 +124,6 @@ typedef struct
|
||||
int degraded_monitoring_timeout;
|
||||
int async_query_timeout;
|
||||
int primary_notification_timeout;
|
||||
int primary_follow_timeout;
|
||||
int standby_reconnect_timeout;
|
||||
|
||||
/* BDR settings */
|
||||
@@ -167,6 +170,9 @@ typedef struct
|
||||
false, "", "", { NULL, NULL }, "", false, "", false, "", \
|
||||
/* standby promote settings */ \
|
||||
DEFAULT_PROMOTE_CHECK_TIMEOUT, DEFAULT_PROMOTE_CHECK_INTERVAL, \
|
||||
/* standby follow settings */ \
|
||||
DEFAULT_PRIMARY_FOLLOW_TIMEOUT, \
|
||||
DEFAULT_STANDBY_FOLLOW_TIMEOUT, \
|
||||
/* node check settings */ \
|
||||
DEFAULT_ARCHIVE_READY_WARNING, DEFAULT_ARCHIVE_READY_CRITICAL, \
|
||||
DEFAULT_REPLICATION_LAG_WARNING, DEFAULT_REPLICATION_LAG_CRITICAL, \
|
||||
@@ -180,7 +186,6 @@ typedef struct
|
||||
false, -1, \
|
||||
DEFAULT_ASYNC_QUERY_TIMEOUT, \
|
||||
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
|
||||
DEFAULT_PRIMARY_FOLLOW_TIMEOUT, \
|
||||
DEFAULT_STANDBY_RECONNECT_TIMEOUT, \
|
||||
/* BDR settings */ \
|
||||
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
AC_INIT([repmgr], [4.0.5], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
|
||||
AC_INIT([repmgr], [4.0.6], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
|
||||
|
||||
AC_COPYRIGHT([Copyright (c) 2010-2018, 2ndQuadrant Ltd.])
|
||||
|
||||
|
||||
36
dbutils.c
36
dbutils.c
@@ -23,6 +23,7 @@
|
||||
#include <sys/time.h>
|
||||
#include <sys/stat.h>
|
||||
#include <dirent.h>
|
||||
#include <arpa/inet.h>
|
||||
|
||||
#include "repmgr.h"
|
||||
#include "dbutils.h"
|
||||
@@ -370,6 +371,37 @@ get_conninfo_value(const char *conninfo, const char *keyword, char *output)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Get a default conninfo value for the provided parameter, and copy
|
||||
* it to the 'output' buffer.
|
||||
*
|
||||
* Returns true on success, or false on failure (provided keyword not found).
|
||||
*
|
||||
*/
|
||||
bool
|
||||
get_conninfo_default_value(const char *param, char *output, int maxlen)
|
||||
{
|
||||
PQconninfoOption *defs = NULL;
|
||||
PQconninfoOption *def = NULL;
|
||||
bool found = false;
|
||||
|
||||
defs = PQconndefaults();
|
||||
|
||||
for (def = defs; def->keyword; def++)
|
||||
{
|
||||
if (strncmp(def->keyword, param, maxlen) == 0)
|
||||
{
|
||||
strncpy(output, def->val, maxlen);
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
|
||||
PQconninfoFree(defs);
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
initialize_conninfo_params(t_conninfo_param_list *param_list, bool set_defaults)
|
||||
{
|
||||
@@ -1733,7 +1765,7 @@ _populate_node_record(PGresult *res, t_node_info *node_info, int row)
|
||||
strncpy(node_info->location, PQgetvalue(res, row, 7), MAXLEN);
|
||||
node_info->priority = atoi(PQgetvalue(res, row, 8));
|
||||
node_info->active = atobool(PQgetvalue(res, row, 9));
|
||||
strncpy(node_info->config_file, PQgetvalue(res, row, 10), MAXLEN);
|
||||
strncpy(node_info->config_file, PQgetvalue(res, row, 10), MAXPGPATH);
|
||||
|
||||
/* This won't normally be set */
|
||||
strncpy(node_info->upstream_node_name, PQgetvalue(res, row, 11), MAXLEN);
|
||||
@@ -3110,6 +3142,8 @@ _create_event(PGconn *conn, t_configuration_options *options, int node_id, char
|
||||
char event_timestamp[MAXLEN] = "";
|
||||
bool success = true;
|
||||
|
||||
log_verbose(LOG_DEBUG, "_create_event(): event is \"%s\" for node %i", event, node_id);
|
||||
|
||||
/*
|
||||
* Only attempt to write a record if a connection handle was provided.
|
||||
* Also check that the repmgr schema has been properly initialised - if
|
||||
|
||||
@@ -357,7 +357,7 @@ void close_connection(PGconn **conn);
|
||||
|
||||
/* conninfo manipulation functions */
|
||||
bool get_conninfo_value(const char *conninfo, const char *keyword, char *output);
|
||||
|
||||
bool get_conninfo_default_value(const char *param, char *output, int maxlen);
|
||||
void initialize_conninfo_params(t_conninfo_param_list *param_list, bool set_defaults);
|
||||
void free_conninfo_params(t_conninfo_param_list *param_list);
|
||||
void copy_conninfo_params(t_conninfo_param_list *dest_list, t_conninfo_param_list *source_list);
|
||||
@@ -369,6 +369,7 @@ bool parse_conninfo_string(const char *conninfo_str, t_conninfo_param_list *par
|
||||
char *param_list_to_string(t_conninfo_param_list *param_list);
|
||||
bool has_passfile(void);
|
||||
|
||||
|
||||
/* transaction functions */
|
||||
bool begin_transaction(PGconn *conn);
|
||||
bool commit_transaction(PGconn *conn);
|
||||
|
||||
@@ -41,18 +41,19 @@
|
||||
<title>CentOS repositories</title>
|
||||
|
||||
<para>
|
||||
&repmgr; packages are available from the 2ndQuadrant repository, and also the PostgreSQL
|
||||
community repository. The 2ndQuadrant repository is updated immediately after each
|
||||
&repmgr; packages are available from the public 2ndQuadrant repository, and also the
|
||||
PostgreSQL community repository. The 2ndQuadrant repository is updated immediately
|
||||
after each
|
||||
&repmgr; release.
|
||||
</para>
|
||||
|
||||
<table id="centos-2ndquadrant-repository">
|
||||
<title>2ndQuadrant repository</title>
|
||||
<title>2ndQuadrant public repository</title>
|
||||
<tgroup cols="2">
|
||||
<tbody>
|
||||
<row>
|
||||
<entry>Repository URL:</entry>
|
||||
<entry><ulink url="http://packages.2ndquadrant.com/repmgr/">http://packages.2ndquadrant.com/repmgr/</ulink></entry>
|
||||
<entry><ulink url="https://rpm.2ndquadrant.com/">https://rpm.2ndquadrant.com/</ulink></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>Repository documentation:</entry>
|
||||
|
||||
@@ -15,6 +15,113 @@
|
||||
See also: <xref linkend="upgrading-repmgr">
|
||||
</para>
|
||||
|
||||
<sect1 id="release-4.0.6">
|
||||
<title>Release 4.0.6</title>
|
||||
<para><emphasis>June ??, 2018</emphasis></para>
|
||||
<para>
|
||||
&repmgr; 4.0.6 contains a number of bug fixes and usability enhancements.
|
||||
</para>
|
||||
<para>
|
||||
We recommend upgrading to this version as soon as possible.
|
||||
This release can be installed as a simple package upgrade from repmgr 4.0 ~ 4.0.5;
|
||||
<application>repmgrd</application> (if running) should be restarted. See <xref linkend="upgrading-repmgr">
|
||||
for more details.
|
||||
</para>
|
||||
|
||||
<sect2>
|
||||
<title>Usability enhancements</title>
|
||||
|
||||
<para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command> and
|
||||
<command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command>:
|
||||
return non-zero exit code if node connection issues detected (GitHub #447)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>:
|
||||
Improve handling of external configuration file copying, including consideration in
|
||||
<option>--dry-run</option> check
|
||||
(GitHub #443)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
When using <option>--dry-run</option>, force log level to <literal>INFO</literal>
|
||||
to ensure output will always be displayed
|
||||
(GitHub #441)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2>
|
||||
<title>Bug fixes</title>
|
||||
<para>
|
||||
|
||||
<itemizedlist>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-witness-register">repmgr witness register</link></command>:
|
||||
prevent registration of a witness server with the same name as an existing node.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-standby-follow">repmgr standby follow</link></command>:
|
||||
check node has actually connected to new primary before reporting success
|
||||
(GitHub #444)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>:
|
||||
Don't require presence of <varname>user</varname> parameter in conninfo string
|
||||
(GitHub #437)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>:
|
||||
Improve documentation of <option>--recovery-conf-only</option> mode
|
||||
(GitHub #438)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-node-rejoin">repmgr node rejoin</link></command>:
|
||||
Fix bug when parsing <option>--config-files</option> parameter
|
||||
(GitHub #442)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<application>repmgrd</application>: ensure local node is counted as quorum member
|
||||
(GitHub #439)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="release-4.0.5">
|
||||
<title>Release 4.0.5</title>
|
||||
<para><emphasis>Wed May 2, 2018</emphasis></para>
|
||||
@@ -24,6 +131,7 @@
|
||||
generation and (in <application>repmgrd</application>) handling of various
|
||||
corner-case situations, as well as a number of bug fixes.
|
||||
</para>
|
||||
|
||||
<sect2>
|
||||
<title>Usability enhancements</title>
|
||||
|
||||
|
||||
@@ -5,26 +5,27 @@
|
||||
system.
|
||||
</para>
|
||||
|
||||
<sect2 id="installation-packages-redhat" xreflabel="Installing from packages on RHEL, Fedora and CentOS">
|
||||
<sect2 id="installation-packages-redhat" xreflabel="Installing from packages on RHEL, CentOS and Fedora">
|
||||
|
||||
<indexterm>
|
||||
<primary>installation</primary>
|
||||
<secondary>on Red Hat/CentOS/Fedora etc.</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>RedHat/Fedora/CentOS</title>
|
||||
<title>RedHat/CentOS/Fedora</title>
|
||||
<para>
|
||||
RPM packages for &repmgr; are available via Yum through
|
||||
&repmgr; RPM packages for RedHat/CentOS variants and Fedora are available from the
|
||||
<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink>
|
||||
<ulink url="https://rpm.2ndquadrant.com/">public RPM repository</ulink>; see following
|
||||
section for details.
|
||||
</para>
|
||||
<para>
|
||||
RPM packages for &repmgr; are also available via Yum through
|
||||
the PostgreSQL Global Development Group RPM repository
|
||||
(<ulink url="https://yum.postgresql.org/">http://yum.postgresql.org/</ulink>).
|
||||
Follow the instructions for your distribution (RedHat, CentOS,
|
||||
Fedora, etc.) and architecture as detailed there.
|
||||
</para>
|
||||
<para>
|
||||
<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink> also provides its
|
||||
own RPM packages which are made available
|
||||
at the same time as each &repmgr; release, as it can take some days for
|
||||
them to become available via the main PGDG repository. See following section for details:
|
||||
Fedora, etc.) and architecture as detailed there. Note that it can take some days
|
||||
for new &repmgr; packages to become available via the this repository.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
@@ -43,59 +44,68 @@
|
||||
|
||||
|
||||
<sect3 id="installation-packages-redhat-2ndq">
|
||||
<title>2ndQuadrant repmgr yum repository</title>
|
||||
<title>2ndQuadrant public RPM yum repository</title>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink> previously provided a dedicated
|
||||
&repmgr; repository at
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/">http://packages.2ndquadrant.com/repmgr/</ulink>.
|
||||
This repository will be deprecated in a future release as it is now replaced by
|
||||
the <ulink url="https://rpm.2ndquadrant.com/">public RPM repository</ulink>
|
||||
documented below.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
Beginning with <ulink url="http://repmgr.org/release-notes-3.1.3.html">repmgr 3.1.3</ulink>,
|
||||
Beginning with <ulink url="https://repmgr.org/docs/4.0/release-4.0.5.html">repmgr 4.0.5</ulink>,
|
||||
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a dedicated <literal>yum</literal>
|
||||
repository for &repmgr; releases. This repository complements the main
|
||||
<ulink url="https://yum.postgresql.org/repopackages.php">PGDG community repository</ulink>,
|
||||
but enables repmgr users to access the latest &repmgr; packages before they are
|
||||
available via the PGDG repository, which can take several days to be updated following
|
||||
a fresh &repmgr; release.
|
||||
</para>
|
||||
<ulink url="https://rpm.2ndquadrant.com/">public RPM repository</ulink> for 2ndQuadrant software,
|
||||
including &repmgr;. We recommend using this for all future &repmgr; releases.
|
||||
</para>
|
||||
<para>
|
||||
General instructions for using this repository can be found on its
|
||||
<ulink url="https://rpm.2ndquadrant.com/">homepage</ulink>. Specific instructions
|
||||
for installing &repmgr; follow below.
|
||||
</para>
|
||||
<para>
|
||||
<emphasis>Installation</emphasis>
|
||||
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
Import the repository public key (optional but recommended):
|
||||
<programlisting>
|
||||
rpm --import http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
Locate the repository RPM for your PostgreSQL version from the list at:
|
||||
<ulink url="https://rpm.2ndquadrant.com/">https://rpm.2ndquadrant.com/</ulink>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the repository RPM for your distribution (this enables the 2ndQuadrant
|
||||
repository as a source of repmgr packages):
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<emphasis>Fedora:</emphasis>
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm</ulink>
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<emphasis>RHEL, CentOS etc:</emphasis>
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm</ulink>
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
e.g.:
|
||||
<programlisting>
|
||||
$ yum install http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
Install the repository RPM for your distribution and PostgreSQL version
|
||||
(this enables the 2ndQuadrant repository as a source of &repmgr; packages).
|
||||
</para>
|
||||
<para>
|
||||
For example, for PostgreSQL 10 on CentOS, execute:
|
||||
<programlisting>
|
||||
sudo yum install https://rpm.2ndquadrant.com/site/content/2ndquadrant-repo-10-1-1.el7.noarch.rpm
|
||||
</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Verify that the repository is installed with:
|
||||
<programlisting>
|
||||
sudo yum repolist</programlisting>
|
||||
The output should contain two entries like this:
|
||||
<programlisting>
|
||||
2ndquadrant-repo-10/7/x86_64 2ndQuadrant packages for PG10 for rhel 7 - x86_64 1
|
||||
2ndquadrant-repo-10-debug/7/x86_64 2ndQuadrant packages for PG10 for rhel 7 - x86_64 - Debug 1</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr96</literal>), e.g.:
|
||||
Install the &repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr10</literal>):
|
||||
<programlisting>
|
||||
$ yum install repmgr96</programlisting>
|
||||
$ yum install repmgr10</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
@@ -105,13 +115,13 @@
|
||||
<emphasis>Compatibility with PGDG Repositories</emphasis>
|
||||
</para>
|
||||
<para>
|
||||
The 2ndQuadrant &repmgr; yum repository uses exactly the same package definitions as the
|
||||
main PGDG repository and is effectively a selective mirror for &repmgr; packages only.
|
||||
The 2ndQuadrant &repmgr; yum repository packages use the same definitions and file system layout as the
|
||||
main PGDG repository.
|
||||
</para>
|
||||
<para>
|
||||
Normally yum should prioritize the repository with the most recent &repmgr; version.
|
||||
Once the PGDG repository has been updated, it doesn't matter which repository
|
||||
the packages are installed from.
|
||||
Normally <application>yum</application> will prioritize the repository with the most recent &repmgr; version.
|
||||
Once the PGDG repository has been updated, it doesn't matter which repository
|
||||
the packages are installed from.
|
||||
</para>
|
||||
<para>
|
||||
To ensure the 2ndQuadrant repository is always prioritised, install <literal>yum-plugin-priorities</literal>
|
||||
@@ -125,30 +135,23 @@
|
||||
To install a specific package version, execute <command>yum --showduplicates list</command>
|
||||
for the package in question:
|
||||
<programlisting>
|
||||
[root@localhost ~]# yum --showduplicates list repmgr96
|
||||
[root@localhost ~]# yum --showduplicates list repmgr10
|
||||
Loaded plugins: fastestmirror
|
||||
Loading mirror speeds from cached hostfile
|
||||
* base: ftp.iij.ad.jp
|
||||
* extras: ftp.iij.ad.jp
|
||||
* updates: ftp.iij.ad.jp
|
||||
Available Packages
|
||||
repmgr96.x86_64 3.2-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.2.1-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3.1-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3.2-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3.2-1.rhel6 pgdg96
|
||||
repmgr96.x86_64 4.0.0-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 4.0.0-1.rhel6 pgdg96</programlisting>
|
||||
repmgr10.x86_64 4.0.3-1.rhel7 pgdg10
|
||||
repmgr10.x86_64 4.0.4-1.rhel7 pgdg10
|
||||
repmgr10.x86_64 4.0.5-1.el7 2ndquadrant-repo-10</programlisting>
|
||||
then append the appropriate version number to the package name with a hyphen, e.g.:
|
||||
<programlisting>
|
||||
[root@localhost ~]# yum install repmgr96-3.3.2-1.el6</programlisting>
|
||||
[root@localhost ~]# yum install repmgr10-4.0.3-1.rhel7</programlisting>
|
||||
</para>
|
||||
</sect3>
|
||||
</sect2>
|
||||
|
||||
|
||||
|
||||
<sect2 id="installation-packages-debian" xreflabel="Installing from packages on Debian or Ubuntu">
|
||||
|
||||
<indexterm>
|
||||
@@ -168,6 +171,79 @@
|
||||
see the appendix section <xref linkend="packages-debian-ubuntu">.
|
||||
</para>
|
||||
|
||||
<sect3 id="installation-packages-debian-ubuntu-2ndq">
|
||||
<title>2ndQuadrant public apt repository for Debian/Ubuntu</title>
|
||||
|
||||
<para>
|
||||
Beginning with <ulink url="https://repmgr.org/docs/4.0/release-4.0.5.html">repmgr 4.0.5</ulink>,
|
||||
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a
|
||||
<ulink url="https://apt.2ndquadrant.com/">public apt repository</ulink> for 2ndQuadrant software,
|
||||
including &repmgr;.
|
||||
</para>
|
||||
<para>
|
||||
General instructions for using this repository can be found on its
|
||||
<ulink url="https://apt.2ndquadrant.com/">homepage</ulink>. Specific instructions
|
||||
for installing &repmgr; follow below.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<emphasis>Installation</emphasis>
|
||||
|
||||
<itemizedlist>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
If not already present, install the <application>apt-transport-https</application> package:
|
||||
<programlisting>
|
||||
sudo apt-get install apt-transport-https</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Create <filename>/etc/apt/sources.list.d/2ndquadrant.list</filename> as follows:
|
||||
<programlisting>
|
||||
sudo sh -c 'echo "deb https://apt.2ndquadrant.com/ $(lsb_release -cs)-2ndquadrant main" > /etc/apt/sources.list.d/2ndquadrant.list'</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the 2ndQuadrant <ulink url="https://apt.2ndquadrant.com/site/keys/9904CD4BD6BAF0C3.asc">repository key</ulink>:
|
||||
<programlisting>
|
||||
sudo apt-get install curl ca-certificates
|
||||
curl https://apt.2ndquadrant.com/site/keys/9904CD4BD6BAF0C3.asc | sudo apt-key add -</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Update the package list
|
||||
<programlisting>
|
||||
sudo apt-get update</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the &repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr10</literal>):
|
||||
<programlisting>
|
||||
$ apt-get install postgresql-10-repmgr</programlisting>
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
For packages for PostgreSQL 9.6 and earlier, the package name includes
|
||||
a period between major and minor version numbers, e.g.
|
||||
<literal>postgresql-9.6-repmgr</literal>.
|
||||
</para>
|
||||
</note>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
</para>
|
||||
|
||||
</sect3>
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
|
||||
@@ -80,7 +80,7 @@
|
||||
</para>
|
||||
|
||||
<para>
|
||||
There are also tags for each &repmgr; release, e.g. <filename>REL4_0_STABLE</filename>.
|
||||
There are also tags for each &repmgr; release, e.g. <filename>4.0.5</filename>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
|
||||
@@ -38,5 +38,34 @@
|
||||
and therefore determine the state of outbound connections from that node.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
Following exit codes can be emitted by <command>repmgr cluster crosscheck</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The check completed successfully and all nodes are reachable.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_CLUSTER_CHECK (25)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
One or more nodes could not be reached.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
|
||||
|
||||
@@ -97,5 +97,35 @@
|
||||
useful result.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
Following exit codes can be emitted by <command>repmgr cluster matrix</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The check completed successfully and all nodes are reachable.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_CLUSTER_CHECK (25)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
One or more nodes could not be reached.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
|
||||
|
||||
@@ -124,7 +124,7 @@
|
||||
<para>
|
||||
We recommend using <ulink url="https://www.pgbarman.org/">Barman</ulink> to manage
|
||||
WAL file archiving. For more details on combining &repmgr; and <application>Barman</application>,
|
||||
in particular using <varname>restore_command</varname> to configure Barman as a backu source of
|
||||
in particular using <varname>restore_command</varname> to configure Barman as a backup source of
|
||||
WAL files, see <xref linkend="cloning-from-barman">.
|
||||
</para>
|
||||
</note>
|
||||
@@ -177,12 +177,13 @@
|
||||
<title>Using a standby cloned by another method</title>
|
||||
<para>
|
||||
&repmgr; supports standbys cloned by another method (e.g. using <application>barman</application>'s
|
||||
<command>barman recover</command> command).
|
||||
<command><ulink url="http://docs.pgbarman.org/release/2.4/#recover">barman recover</ulink></command> command).
|
||||
</para>
|
||||
<para>
|
||||
To integrate the standby as a &repmgr; node, ensure the <filename>repmgr.conf</filename>
|
||||
file is created for the node, then execute the command
|
||||
<command>repmgr standby clone --recovery-conf-only</command>.
|
||||
file is created for the node, and that it has been registered using
|
||||
<command><link linkend="repmgr-standby-register">repmgr standby register</link></command>.
|
||||
Then execute the command <command>repmgr standby clone --recovery-conf-only</command>.
|
||||
This will create the <filename>recovery.conf</filename> file needed to attach
|
||||
the node to its upstream, and will also create a replication slot on the
|
||||
upstream node if required.
|
||||
|
||||
@@ -26,10 +26,18 @@
|
||||
running. It can only be used to attach an active standby to the current primary node
|
||||
(and not to another standby).
|
||||
</para>
|
||||
<para>
|
||||
To re-add an inactive node to the replication cluster, see
|
||||
<xref linkend="repmgr-node-rejoin">
|
||||
</para>
|
||||
<tip>
|
||||
<para>
|
||||
To re-add an inactive node to the replication cluster, use
|
||||
<xref linkend="repmgr-node-rejoin">.
|
||||
</para>
|
||||
</tip>
|
||||
|
||||
<para>
|
||||
<command>repmgr standby follow</command> will wait up to
|
||||
<varname>standby_follow_timeout</varname> seconds (default: <literal>30</literal>)
|
||||
to verify the standby has actually connected to the new primary.
|
||||
</para>
|
||||
|
||||
</refsect1>
|
||||
|
||||
@@ -92,7 +100,7 @@
|
||||
A <literal>standby_follow</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
||||
</para>
|
||||
<para>
|
||||
If provided, &repmgr; will subsitute the placeholders <literal>%p</literal> with the node ID of the primary
|
||||
If provided, &repmgr; will substitute the placeholders <literal>%p</literal> with the node ID of the primary
|
||||
being followed, <literal>%c</literal> with its <literal>conninfo</literal> string, and
|
||||
<literal>%a</literal> with its node name.
|
||||
</para>
|
||||
|
||||
@@ -173,7 +173,7 @@
|
||||
</para>
|
||||
|
||||
<para>
|
||||
If provided, &repmgr; will subsitute the placeholders <literal>%p</literal> with the node ID of the
|
||||
If provided, &repmgr; will substitute the placeholders <literal>%p</literal> with the node ID of the
|
||||
primary node, <literal>%c</literal> with its <literal>conninfo</literal> string, and
|
||||
<literal>%a</literal> with its node name.
|
||||
</para>
|
||||
|
||||
@@ -199,7 +199,7 @@
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
Following exit codes can be emitted by <literal>repmgr standby switchover</literal>:
|
||||
Following exit codes can be emitted by <command>repmgr standby switchover</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
@@ -227,7 +227,7 @@
|
||||
<para>
|
||||
The switchover was executed but a problem was encountered.
|
||||
Typically this means the former primary could not be reattached
|
||||
as a standby.
|
||||
as a standby. Check preceding log messages for more information.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
@@ -1 +1 @@
|
||||
<!ENTITY repmgrversion "4.0.5">
|
||||
<!ENTITY repmgrversion "4.0.6">
|
||||
|
||||
@@ -46,5 +46,6 @@
|
||||
#define ERR_SWITCHOVER_INCOMPLETE 22
|
||||
#define ERR_FOLLOW_FAIL 23
|
||||
#define ERR_REJOIN_FAIL 24
|
||||
#define ERR_CLUSTER_CHECK 25
|
||||
|
||||
#endif /* _ERRCODE_H_ */
|
||||
|
||||
7
log.c
7
log.c
@@ -329,6 +329,13 @@ logger_set_terse(void)
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
logger_set_min_level(int min_log_level)
|
||||
{
|
||||
if (min_log_level > log_level)
|
||||
log_level = min_log_level;
|
||||
}
|
||||
|
||||
int
|
||||
detect_log_level(const char *level)
|
||||
{
|
||||
|
||||
1
log.h
1
log.h
@@ -128,6 +128,7 @@ bool logger_shutdown(void);
|
||||
|
||||
void logger_set_verbose(void);
|
||||
void logger_set_terse(void);
|
||||
void logger_set_min_level(int min_log_level);
|
||||
|
||||
void
|
||||
log_detail(const char *fmt,...)
|
||||
|
||||
@@ -569,6 +569,8 @@ do_cluster_crosscheck(void)
|
||||
|
||||
t_node_status_cube **cube;
|
||||
|
||||
bool error_found = false;
|
||||
|
||||
n = build_cluster_crosscheck(&cube, &name_length);
|
||||
if (runtime_options.output_mode == OM_CSV)
|
||||
{
|
||||
@@ -648,9 +650,11 @@ do_cluster_crosscheck(void)
|
||||
{
|
||||
case -2:
|
||||
c = '?';
|
||||
error_found = true;
|
||||
break;
|
||||
case -1:
|
||||
c = 'x';
|
||||
error_found = true;
|
||||
break;
|
||||
case 0:
|
||||
c = '*';
|
||||
@@ -689,6 +693,11 @@ do_cluster_crosscheck(void)
|
||||
|
||||
free(cube);
|
||||
}
|
||||
|
||||
if (error_found == true)
|
||||
{
|
||||
exit(ERR_CLUSTER_CHECK);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -704,6 +713,8 @@ do_cluster_matrix()
|
||||
|
||||
t_node_matrix_rec **matrix_rec_list;
|
||||
|
||||
bool error_found = false;
|
||||
|
||||
n = build_cluster_matrix(&matrix_rec_list, &name_length);
|
||||
|
||||
if (runtime_options.output_mode == OM_CSV)
|
||||
@@ -742,9 +753,11 @@ do_cluster_matrix()
|
||||
{
|
||||
case -2:
|
||||
c = '?';
|
||||
error_found = true;
|
||||
break;
|
||||
case -1:
|
||||
c = 'x';
|
||||
error_found = true;
|
||||
break;
|
||||
case 0:
|
||||
c = '*';
|
||||
@@ -770,6 +783,11 @@ do_cluster_matrix()
|
||||
}
|
||||
|
||||
free(matrix_rec_list);
|
||||
|
||||
if (error_found == true)
|
||||
{
|
||||
exit(ERR_CLUSTER_CHECK);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1672,6 +1672,13 @@ parse_server_action(const char *action_name)
|
||||
*
|
||||
* Note that "repmgr node rejoin" is also executed by
|
||||
* "repmgr standby switchover" after promoting the new primary.
|
||||
*
|
||||
* Parameters:
|
||||
* --dry-run
|
||||
* --force-rewind[=VALUE]
|
||||
* --config-files
|
||||
* --config-archive-dir
|
||||
* -W/--no-wait
|
||||
*/
|
||||
void
|
||||
do_node_rejoin(void)
|
||||
@@ -1789,30 +1796,33 @@ do_node_rejoin(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* If --force-rewind specified, check pg_rewind can be used, and
|
||||
* pre-emptively fetch the list of configuration files which should be
|
||||
* archived
|
||||
* --force-rewind specified - check prerequisites, and attempt to execute
|
||||
* (if --dry-run provided, just output the command which would be executed)
|
||||
*/
|
||||
|
||||
|
||||
if (runtime_options.force_rewind_used == true)
|
||||
{
|
||||
PQExpBufferData reason;
|
||||
PQExpBufferData msg;
|
||||
PQExpBufferData filebuf;
|
||||
int ret;
|
||||
|
||||
initPQExpBuffer(&reason);
|
||||
/*
|
||||
* Check that pg_rewind can be used
|
||||
*/
|
||||
|
||||
if (can_use_pg_rewind(upstream_conn, config_file_options.data_directory, &reason) == false)
|
||||
initPQExpBuffer(&msg);
|
||||
|
||||
if (can_use_pg_rewind(upstream_conn, config_file_options.data_directory, &msg) == false)
|
||||
{
|
||||
log_error(_("--force-rewind specified but pg_rewind cannot be used"));
|
||||
log_detail("%s", reason.data);
|
||||
termPQExpBuffer(&reason);
|
||||
log_detail("%s", msg.data);
|
||||
termPQExpBuffer(&msg);
|
||||
PQfinish(upstream_conn);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
termPQExpBuffer(&reason);
|
||||
|
||||
initPQExpBuffer(&msg);
|
||||
appendPQExpBuffer(&msg,
|
||||
_("prerequisites for using pg_rewind are met"));
|
||||
|
||||
@@ -1825,17 +1835,14 @@ do_node_rejoin(void)
|
||||
log_verbose(LOG_INFO, "%s", msg.data);
|
||||
}
|
||||
termPQExpBuffer(&msg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Forcibly rewind node if requested (this is mainly for use when this
|
||||
* action is being executed by "repmgr standby switchover")
|
||||
*/
|
||||
if (runtime_options.force_rewind_used == true && runtime_options.dry_run == false)
|
||||
{
|
||||
int ret;
|
||||
PQExpBufferData filebuf;
|
||||
|
||||
/*
|
||||
* Archive requested configuration files.
|
||||
*
|
||||
* In --dry-run mode this acts as a check that the files can be archived, though
|
||||
* errors will only be logged; any copied files will be deleted and --dry-run
|
||||
* execution will continue.
|
||||
*/
|
||||
_do_node_archive_config();
|
||||
|
||||
/* execute pg_rewind */
|
||||
@@ -1866,121 +1873,119 @@ do_node_rejoin(void)
|
||||
log_info(_("pg_rewind would now be executed"));
|
||||
log_detail(_("pg_rewind command is:\n %s"),
|
||||
command.data);
|
||||
|
||||
PQfinish(upstream_conn);
|
||||
exit(SUCCESS);
|
||||
}
|
||||
|
||||
log_notice(_("executing pg_rewind"));
|
||||
log_debug("pg_rewind command is:\n %s",
|
||||
command.data);
|
||||
|
||||
initPQExpBuffer(&command_output);
|
||||
|
||||
ret = local_command(
|
||||
command.data,
|
||||
&command_output);
|
||||
|
||||
termPQExpBuffer(&command);
|
||||
|
||||
if (ret == false)
|
||||
else
|
||||
{
|
||||
log_error(_("unable to execute pg_rewind"));
|
||||
log_detail("%s", command_output.data);
|
||||
log_notice(_("executing pg_rewind"));
|
||||
log_debug("pg_rewind command is:\n %s",
|
||||
command.data);
|
||||
|
||||
initPQExpBuffer(&command_output);
|
||||
|
||||
ret = local_command(command.data,
|
||||
&command_output);
|
||||
|
||||
termPQExpBuffer(&command);
|
||||
|
||||
if (ret == false)
|
||||
{
|
||||
log_error(_("unable to execute pg_rewind"));
|
||||
log_detail("%s", command_output.data);
|
||||
|
||||
termPQExpBuffer(&command_output);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
termPQExpBuffer(&command_output);
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
/* Restore any previously archived config files */
|
||||
_do_node_restore_config();
|
||||
|
||||
termPQExpBuffer(&command_output);
|
||||
initPQExpBuffer(&filebuf);
|
||||
|
||||
/* Restore any previously archived config files */
|
||||
_do_node_restore_config();
|
||||
|
||||
initPQExpBuffer(&filebuf);
|
||||
|
||||
/* remove any recovery.done file copied in by pg_rewind */
|
||||
appendPQExpBuffer(&filebuf,
|
||||
"%s/recovery.done",
|
||||
config_file_options.data_directory);
|
||||
|
||||
if (stat(filebuf.data, &statbuf) == 0)
|
||||
{
|
||||
log_verbose(LOG_INFO, _("deleting \"recovery.done\""));
|
||||
|
||||
if (unlink(filebuf.data) == -1)
|
||||
{
|
||||
log_warning(_("unable to delete \"%s\""),
|
||||
filebuf.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
}
|
||||
}
|
||||
termPQExpBuffer(&filebuf);
|
||||
|
||||
/*
|
||||
* Delete any replication slots copied in by pg_rewind.
|
||||
*
|
||||
* TODO:
|
||||
* - from PostgreSQL 11, this will be handled by pg_rewind, so
|
||||
* we can skip this step from that version; see commit
|
||||
* 266b6acb312fc440c1c1a2036aa9da94916beac6
|
||||
* - possibly delete contents various other directories
|
||||
* as per the above commit for pre-PostgreSQL 11
|
||||
*/
|
||||
{
|
||||
PQExpBufferData slotdir_path;
|
||||
DIR *slotdir;
|
||||
struct dirent *slotdir_ent;
|
||||
|
||||
initPQExpBuffer(&slotdir_path);
|
||||
|
||||
appendPQExpBuffer(&slotdir_path,
|
||||
"%s/pg_replslot",
|
||||
/* remove any recovery.done file copied in by pg_rewind */
|
||||
appendPQExpBuffer(&filebuf,
|
||||
"%s/recovery.done",
|
||||
config_file_options.data_directory);
|
||||
|
||||
slotdir = opendir(slotdir_path.data);
|
||||
|
||||
if (slotdir == NULL)
|
||||
if (stat(filebuf.data, &statbuf) == 0)
|
||||
{
|
||||
log_warning(_("unable to open replication slot directory \"%s\""),
|
||||
slotdir_path.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
}
|
||||
else
|
||||
{
|
||||
while ((slotdir_ent = readdir(slotdir)) != NULL) {
|
||||
struct stat statbuf;
|
||||
PQExpBufferData slotdir_ent_path;
|
||||
log_verbose(LOG_INFO, _("deleting \"recovery.done\""));
|
||||
|
||||
if(strcmp(slotdir_ent->d_name, ".") == 0 || strcmp(slotdir_ent->d_name, "..") == 0)
|
||||
continue;
|
||||
|
||||
initPQExpBuffer(&slotdir_ent_path);
|
||||
|
||||
appendPQExpBuffer(&slotdir_ent_path,
|
||||
"%s/%s",
|
||||
slotdir_path.data,
|
||||
slotdir_ent->d_name);
|
||||
|
||||
if (stat(slotdir_ent_path.data, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
|
||||
{
|
||||
termPQExpBuffer(&slotdir_ent_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
log_debug("deleting slot directory \"%s\"", slotdir_ent_path.data);
|
||||
if (rmdir_recursive(slotdir_ent_path.data) != 0 && errno != EEXIST)
|
||||
{
|
||||
log_warning(_("unable to delete replication slot directory \"%s\""), slotdir_ent_path.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
log_hint(_("directory may need to be manually removed"));
|
||||
}
|
||||
|
||||
termPQExpBuffer(&slotdir_ent_path);
|
||||
if (unlink(filebuf.data) == -1)
|
||||
{
|
||||
log_warning(_("unable to delete \"%s\""),
|
||||
filebuf.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
}
|
||||
}
|
||||
termPQExpBuffer(&slotdir_path);
|
||||
termPQExpBuffer(&filebuf);
|
||||
|
||||
/*
|
||||
* Delete any replication slots copied in by pg_rewind.
|
||||
*
|
||||
* TODO:
|
||||
* - from PostgreSQL 11, this will be handled by pg_rewind, so
|
||||
* we can skip this step from that version; see commit
|
||||
* 266b6acb312fc440c1c1a2036aa9da94916beac6
|
||||
* - possibly delete contents of various other directories
|
||||
* as per the above commit for pre-PostgreSQL 11
|
||||
*/
|
||||
{
|
||||
PQExpBufferData slotdir_path;
|
||||
DIR *slotdir;
|
||||
struct dirent *slotdir_ent;
|
||||
|
||||
initPQExpBuffer(&slotdir_path);
|
||||
|
||||
appendPQExpBuffer(&slotdir_path,
|
||||
"%s/pg_replslot",
|
||||
config_file_options.data_directory);
|
||||
|
||||
slotdir = opendir(slotdir_path.data);
|
||||
|
||||
if (slotdir == NULL)
|
||||
{
|
||||
log_warning(_("unable to open replication slot directory \"%s\""),
|
||||
slotdir_path.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
}
|
||||
else
|
||||
{
|
||||
while ((slotdir_ent = readdir(slotdir)) != NULL) {
|
||||
struct stat statbuf;
|
||||
PQExpBufferData slotdir_ent_path;
|
||||
|
||||
if(strcmp(slotdir_ent->d_name, ".") == 0 || strcmp(slotdir_ent->d_name, "..") == 0)
|
||||
continue;
|
||||
|
||||
initPQExpBuffer(&slotdir_ent_path);
|
||||
|
||||
appendPQExpBuffer(&slotdir_ent_path,
|
||||
"%s/%s",
|
||||
slotdir_path.data,
|
||||
slotdir_ent->d_name);
|
||||
|
||||
if (stat(slotdir_ent_path.data, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
|
||||
{
|
||||
termPQExpBuffer(&slotdir_ent_path);
|
||||
continue;
|
||||
}
|
||||
|
||||
log_debug("deleting slot directory \"%s\"", slotdir_ent_path.data);
|
||||
if (rmdir_recursive(slotdir_ent_path.data) != 0 && errno != EEXIST)
|
||||
{
|
||||
log_warning(_("unable to delete replication slot directory \"%s\""), slotdir_ent_path.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
log_hint(_("directory may need to be manually removed"));
|
||||
}
|
||||
|
||||
termPQExpBuffer(&slotdir_ent_path);
|
||||
}
|
||||
}
|
||||
termPQExpBuffer(&slotdir_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2000,7 +2005,9 @@ do_node_rejoin(void)
|
||||
if (success == false)
|
||||
{
|
||||
log_notice(_("NODE REJOIN failed"));
|
||||
log_detail("%s", follow_output.data);
|
||||
|
||||
if (strlen(follow_output.data))
|
||||
log_detail("%s", follow_output.data);
|
||||
|
||||
create_event_notification(upstream_conn,
|
||||
&config_file_options,
|
||||
@@ -2160,6 +2167,11 @@ _do_node_archive_config(void)
|
||||
termPQExpBuffer(&archive_dir);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
log_verbose(LOG_INFO, "temporary archive directory \"%s\" created", archive_dir.data);
|
||||
}
|
||||
}
|
||||
else if (!S_ISDIR(statbuf.st_mode))
|
||||
{
|
||||
@@ -2184,8 +2196,8 @@ _do_node_archive_config(void)
|
||||
{
|
||||
|
||||
/*
|
||||
* attempt to remove any existing files in the directory TODO: collate
|
||||
* problem files into list
|
||||
* attempt to remove any existing files in the directory
|
||||
* TODO: collate problem files into list
|
||||
*/
|
||||
while ((arcdir_ent = readdir(arcdir)) != NULL)
|
||||
{
|
||||
@@ -2261,7 +2273,11 @@ _do_node_archive_config(void)
|
||||
|
||||
if (i < config_file_len)
|
||||
{
|
||||
strncpy(filenamebuf, runtime_options.config_files + i, config_file_len - i);
|
||||
int filename_len = config_file_len - i;
|
||||
|
||||
strncpy(filenamebuf, runtime_options.config_files + i, filename_len);
|
||||
|
||||
filenamebuf[filename_len] = '\0';
|
||||
|
||||
initPQExpBuffer(&pathbuf);
|
||||
appendPQExpBuffer(&pathbuf,
|
||||
@@ -2339,7 +2355,7 @@ _do_node_archive_config(void)
|
||||
}
|
||||
else
|
||||
{
|
||||
log_verbose(LOG_INFO, "directory \"%s\" deleted", archive_dir.data);
|
||||
log_verbose(LOG_INFO, "temporary archive directory \"%s\" deleted", archive_dir.data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -87,7 +87,7 @@ static void initialise_direct_clone(t_node_info *node_record);
|
||||
static int run_basebackup(t_node_info *node_record);
|
||||
static int run_file_backup(t_node_info *node_record);
|
||||
|
||||
static void copy_configuration_files(void);
|
||||
static void copy_configuration_files(bool delete_after_copy);
|
||||
|
||||
static void drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name);
|
||||
|
||||
@@ -498,7 +498,33 @@ do_standby_clone(void)
|
||||
|
||||
termPQExpBuffer(&msg);
|
||||
|
||||
/* TODO: check all files are readable */
|
||||
/*
|
||||
* Here we'll attempt an initial test copy of the detected external
|
||||
* files, to detect any issues before we run the base backup.
|
||||
*
|
||||
* Note this will exit with an error, unless -F/--force supplied.
|
||||
*
|
||||
* TODO: put the files in a temporary directory and move to their final
|
||||
* destination once the database has been cloned.
|
||||
*/
|
||||
|
||||
if (runtime_options.copy_external_config_files_destination == CONFIG_FILE_SAMEPATH)
|
||||
{
|
||||
/*
|
||||
* Files will be placed in the same path as on the source server;
|
||||
* don't delete after copying.
|
||||
*/
|
||||
copy_configuration_files(false);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Files will be placed in the data directory - delete after copying.
|
||||
* They'll be copied again later; see TODO above.
|
||||
*/
|
||||
copy_configuration_files(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -597,7 +623,12 @@ do_standby_clone(void)
|
||||
*/
|
||||
if (runtime_options.copy_external_config_files == true && config_files.entries > 0)
|
||||
{
|
||||
copy_configuration_files();
|
||||
/*
|
||||
* If "--copy-external-config-files=samepath" was used, the files will already
|
||||
* have been copied.
|
||||
*/
|
||||
if (runtime_options.copy_external_config_files_destination == CONFIG_FILE_PGDATA)
|
||||
copy_configuration_files(false);
|
||||
}
|
||||
|
||||
/* Write the recovery.conf file */
|
||||
@@ -938,7 +969,6 @@ _do_create_recovery_conf(void)
|
||||
log_detail("%s", PQerrorMessage(source_conn));
|
||||
}
|
||||
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
@@ -955,7 +985,10 @@ _do_create_recovery_conf(void)
|
||||
{
|
||||
log_detail("%s", PQerrorMessage(source_conn));
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
log_hint(_("standby must be registered before a new recovery.conf file can be created"));
|
||||
}
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
@@ -2126,7 +2159,13 @@ do_standby_follow(void)
|
||||
|
||||
log_verbose(LOG_DEBUG, "do_standby_follow()");
|
||||
|
||||
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
||||
local_conn = establish_db_connection(config_file_options.conninfo, false);
|
||||
|
||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_hint(_("use \"repmgr node rejoin\" to re-add an inactive node to the replication cluster"));
|
||||
exit(ERR_DB_CONN);
|
||||
}
|
||||
|
||||
log_verbose(LOG_INFO, _("connected to local node"));
|
||||
|
||||
@@ -2313,6 +2352,74 @@ do_standby_follow(void)
|
||||
&follow_output,
|
||||
&follow_error_code);
|
||||
|
||||
/* unable to restart the standby */
|
||||
if (success == false)
|
||||
{
|
||||
create_event_notification_extended(
|
||||
primary_conn,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"standby_follow",
|
||||
success,
|
||||
follow_output.data,
|
||||
&event_info);
|
||||
|
||||
PQfinish(primary_conn);
|
||||
|
||||
log_notice(_("STANDBY FOLLOW failed"));
|
||||
if (strlen( follow_output.data ))
|
||||
log_detail("%s", follow_output.data);
|
||||
|
||||
termPQExpBuffer(&follow_output);
|
||||
exit(follow_error_code);
|
||||
}
|
||||
|
||||
termPQExpBuffer(&follow_output);
|
||||
|
||||
initPQExpBuffer(&follow_output);
|
||||
|
||||
/*
|
||||
* Wait up to "standby_follow_timeout" seconds for standby to connect to
|
||||
* upstream.
|
||||
* For 9.6 and later, we could check pg_stat_wal_receiver on the local node.
|
||||
*/
|
||||
|
||||
/* assume success, necessary if standby_follow_timeout is zero */
|
||||
success = true;
|
||||
|
||||
for (timer = 0; timer < config_file_options.standby_follow_timeout; timer++)
|
||||
{
|
||||
success = is_downstream_node_attached(primary_conn, config_file_options.node_name);
|
||||
if (success == true)
|
||||
break;
|
||||
|
||||
log_verbose(LOG_DEBUG, "sleeping %i of max %i seconds waiting for standby to attach to primary",
|
||||
timer + 1,
|
||||
config_file_options.standby_follow_timeout);
|
||||
sleep(1);
|
||||
}
|
||||
|
||||
if (success == true)
|
||||
{
|
||||
log_notice(_("STANDBY FOLLOW successful"));
|
||||
appendPQExpBuffer(&follow_output,
|
||||
"standby attached to upstream node \"%s\" (node ID: %i)",
|
||||
primary_node_record.node_name,
|
||||
primary_node_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_error(_("STANDBY FOLLOW failed"));
|
||||
appendPQExpBuffer(&follow_output,
|
||||
"standby did not attach to upstream node \"%s\" (node ID: %i) after %i seconds",
|
||||
primary_node_record.node_name,
|
||||
primary_node_id,
|
||||
config_file_options.standby_follow_timeout);
|
||||
|
||||
}
|
||||
|
||||
log_detail("%s", follow_output.data);
|
||||
|
||||
create_event_notification_extended(
|
||||
primary_conn,
|
||||
&config_file_options,
|
||||
@@ -2324,20 +2431,11 @@ do_standby_follow(void)
|
||||
|
||||
PQfinish(primary_conn);
|
||||
|
||||
if (success == false)
|
||||
{
|
||||
log_notice(_("STANDBY FOLLOW failed"));
|
||||
log_detail("%s", follow_output.data);
|
||||
|
||||
termPQExpBuffer(&follow_output);
|
||||
exit(follow_error_code);
|
||||
}
|
||||
|
||||
log_notice(_("STANDBY FOLLOW successful"));
|
||||
log_detail("%s", follow_output.data);
|
||||
|
||||
termPQExpBuffer(&follow_output);
|
||||
|
||||
if (success == false)
|
||||
exit(ERR_FOLLOW_FAIL);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -3909,6 +4007,8 @@ check_source_server()
|
||||
PGconn *privileged_conn = NULL;
|
||||
|
||||
char cluster_size[MAXLEN];
|
||||
char *connstr = NULL;
|
||||
|
||||
t_node_info node_record = T_NODE_INFO_INITIALIZER;
|
||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||
ExtensionStatus extension_status = REPMGR_UNKNOWN;
|
||||
@@ -3917,8 +4017,11 @@ check_source_server()
|
||||
log_verbose(LOG_DEBUG, "check_source_server()");
|
||||
log_info(_("connecting to source node"));
|
||||
|
||||
source_conn = establish_db_connection_by_params(&source_conninfo, false);
|
||||
connstr = param_list_to_string(&source_conninfo);
|
||||
log_detail(_("connection string is: %s"), connstr);
|
||||
pfree(connstr);
|
||||
|
||||
source_conn = establish_db_connection_by_params(&source_conninfo, false);
|
||||
/*
|
||||
* Unless in barman mode, exit with an error;
|
||||
* establish_db_connection_by_params() will have already logged an error
|
||||
@@ -4073,13 +4176,25 @@ check_source_server()
|
||||
if (record_status == RECORD_FOUND)
|
||||
{
|
||||
t_conninfo_param_list upstream_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
||||
char *upstream_conninfo_user;
|
||||
|
||||
initialize_conninfo_params(&upstream_conninfo, false);
|
||||
parse_conninfo_string(node_record.conninfo, &upstream_conninfo, NULL, false);
|
||||
|
||||
strncpy(recovery_conninfo_str, node_record.conninfo, MAXLEN);
|
||||
strncpy(upstream_repluser, node_record.repluser, NAMEDATALEN);
|
||||
strncpy(upstream_user, param_get(&upstream_conninfo, "user"), NAMEDATALEN);
|
||||
|
||||
upstream_conninfo_user = param_get(&upstream_conninfo, "user");
|
||||
if (upstream_conninfo_user != NULL)
|
||||
{
|
||||
strncpy(upstream_user, upstream_conninfo_user, NAMEDATALEN);
|
||||
}
|
||||
else
|
||||
{
|
||||
get_conninfo_default_value("user", upstream_user, NAMEDATALEN);
|
||||
}
|
||||
|
||||
log_verbose(LOG_DEBUG, "upstream_user is \"%s\"", upstream_user);
|
||||
|
||||
upstream_conninfo_found = true;
|
||||
}
|
||||
@@ -4632,7 +4747,7 @@ initialise_direct_clone(t_node_info *node_record)
|
||||
}
|
||||
else
|
||||
{
|
||||
TablespaceListCell *cell = false;
|
||||
TablespaceListCell *cell;
|
||||
KeyValueList not_found = {NULL, NULL};
|
||||
int total = 0,
|
||||
matched = 0;
|
||||
@@ -5690,7 +5805,7 @@ get_barman_property(char *dst, char *name, char *local_repmgr_directory)
|
||||
|
||||
|
||||
static void
|
||||
copy_configuration_files(void)
|
||||
copy_configuration_files(bool delete_after_copy)
|
||||
{
|
||||
int i,
|
||||
r;
|
||||
@@ -5735,13 +5850,35 @@ copy_configuration_files(void)
|
||||
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
||||
file->filepath, dest_path.data, false, source_server_version_num);
|
||||
|
||||
termPQExpBuffer(&dest_path);
|
||||
/*
|
||||
* TODO: collate errors into list
|
||||
*/
|
||||
|
||||
if (WEXITSTATUS(r))
|
||||
{
|
||||
log_error(_("standby clone: unable to copy config file \"%s\""),
|
||||
file->filename);
|
||||
log_hint(_("see preceding messages for details"));
|
||||
|
||||
if (runtime_options.force == false)
|
||||
exit(ERR_BAD_RSYNC);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is to check we can actually copy the files before running the
|
||||
* main clone operation
|
||||
*/
|
||||
if (delete_after_copy == true)
|
||||
{
|
||||
/* this is very unlikely to happen, but log in case it does */
|
||||
if (unlink(dest_path.data) < 0 && errno != ENOENT)
|
||||
{
|
||||
log_warning(_("unable to delete %s"), dest_path.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
termPQExpBuffer(&dest_path);
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
@@ -137,7 +137,7 @@ do_witness_register(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO:sanity check witness node is not part of main cluster; we could
|
||||
* TODO: sanity check witness node is not part of main cluster; we could
|
||||
* add a random application_name to the respective connections,
|
||||
* and do a simple check of pg_stat_activity
|
||||
*/
|
||||
@@ -193,8 +193,26 @@ do_witness_register(void)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that an active node with the same node_name doesn't exist already
|
||||
*/
|
||||
|
||||
// XXX check other node with same name does not exist
|
||||
record_status = get_node_record_by_name(primary_conn,
|
||||
config_file_options.node_name,
|
||||
&node_record);
|
||||
|
||||
|
||||
if (record_status == RECORD_FOUND)
|
||||
{
|
||||
if (node_record.active == true && node_record.node_id != config_file_options.node_id)
|
||||
{
|
||||
log_error(_("node %i exists already with node_name \"%s\""),
|
||||
node_record.node_id,
|
||||
config_file_options.node_name);
|
||||
PQfinish(primary_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* if repmgr.nodes contains entries, delete if -F/--force provided,
|
||||
@@ -225,6 +243,7 @@ do_witness_register(void)
|
||||
PQfinish(witness_conn);
|
||||
exit(SUCCESS);
|
||||
}
|
||||
|
||||
/* create record on primary */
|
||||
|
||||
/*
|
||||
|
||||
@@ -634,7 +634,7 @@ main(int argc, char **argv)
|
||||
* If -d/--dbname appears to be a conninfo string, validate by attempting
|
||||
* to parse it (and if successful, store the parsed parameters)
|
||||
*/
|
||||
if (runtime_options.dbname)
|
||||
if (runtime_options.dbname[0])
|
||||
{
|
||||
if (strncmp(runtime_options.dbname, "postgresql://", 13) == 0 ||
|
||||
strncmp(runtime_options.dbname, "postgres://", 11) == 0 ||
|
||||
@@ -1010,7 +1010,6 @@ main(int argc, char **argv)
|
||||
runtime_options.output_mode = OM_OPTFORMAT;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Check for configuration file items which can be overriden by runtime
|
||||
* options
|
||||
@@ -1068,6 +1067,17 @@ main(int argc, char **argv)
|
||||
if (runtime_options.terse)
|
||||
logger_set_terse();
|
||||
|
||||
/*
|
||||
* If --dry-run specified, ensure log_level is at least LOG_INFO, regardless
|
||||
* of what's in the configuration file or -L/--log-level paremeter, otherwise
|
||||
* some or output might not be displayed.
|
||||
*/
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
logger_set_min_level(LOG_INFO);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Node configuration information is not needed for all actions, with
|
||||
* STANDBY CLONE being the main exception.
|
||||
|
||||
@@ -98,7 +98,7 @@
|
||||
#log_facility=STDERR # Logging facility: possible values are STDERR, or for
|
||||
# syslog integration, one of LOCAL0, LOCAL1, ..., LOCAL7, USER
|
||||
|
||||
#log_file='' # stderr can be redirected to an arbitrary file:
|
||||
#log_file='' # stderr can be redirected to an arbitrary file
|
||||
#log_status_interval=300 # interval (in seconds) for repmgrd to log a status message
|
||||
|
||||
|
||||
@@ -213,8 +213,10 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
||||
# These settings apply when instructing a standby to follow the new primary
|
||||
# ("repmgr standby follow").
|
||||
|
||||
#primary_follow_timeout=60 # The length of time (in seconds) to wait
|
||||
#primary_follow_timeout=60 # The max length of time (in seconds) to wait
|
||||
# for the new primary to become available
|
||||
#standby_follow_timeout=15 # The max length of time (in seconds) to wait
|
||||
# for the standby to connect to the primary
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
1
repmgr.h
1
repmgr.h
@@ -70,6 +70,7 @@
|
||||
#define DEFAULT_ASYNC_QUERY_TIMEOUT 60 /* seconds */
|
||||
#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */
|
||||
#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 /* seconds */
|
||||
#define DEFAULT_STANDBY_FOLLOW_TIMEOUT 30 /* seconds */
|
||||
#define DEFAULT_BDR_RECOVERY_TIMEOUT 30 /* seconds */
|
||||
#define DEFAULT_ARCHIVE_READY_WARNING 16 /* WAL files */
|
||||
#define DEFAULT_ARCHIVE_READY_CRITICAL 128 /* WAL files */
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
#define REPMGR_VERSION_DATE ""
|
||||
#define REPMGR_VERSION "4.0.5"
|
||||
#define REPMGR_VERSION "4.0.6"
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ static FailoverState failover_state = FAILOVER_STATE_UNKNOWN;
|
||||
|
||||
static int primary_node_id = UNKNOWN_NODE_ID;
|
||||
static t_node_info upstream_node_info = T_NODE_INFO_INITIALIZER;
|
||||
static NodeInfoList standby_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||
static NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||
|
||||
|
||||
static ElectionResult do_election(void);
|
||||
@@ -816,6 +816,29 @@ monitor_streaming_standby(void)
|
||||
{
|
||||
int degraded_monitoring_elapsed = calculate_elapsed(degraded_monitoring_start);
|
||||
|
||||
if (config_file_options.degraded_monitoring_timeout > 0
|
||||
&& degraded_monitoring_elapsed > config_file_options.degraded_monitoring_timeout)
|
||||
{
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("degraded monitoring timeout (%i seconds) exceeded, terminating"),
|
||||
degraded_monitoring_elapsed);
|
||||
|
||||
log_notice("%s", event_details.data);
|
||||
|
||||
create_event_notification(NULL,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"repmgrd_shutdown",
|
||||
true,
|
||||
event_details.data);
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
terminate(ERR_MONITORING_TIMEOUT);
|
||||
}
|
||||
|
||||
|
||||
log_debug("monitoring node %i in degraded state for %i seconds",
|
||||
upstream_node_info.node_id,
|
||||
degraded_monitoring_elapsed);
|
||||
@@ -918,8 +941,8 @@ monitor_streaming_standby(void)
|
||||
get_active_sibling_node_records(local_conn,
|
||||
local_node_info.node_id,
|
||||
former_upstream_node_id,
|
||||
&standby_nodes);
|
||||
notify_followers(&standby_nodes, local_node_info.node_id);
|
||||
&sibling_nodes);
|
||||
notify_followers(&sibling_nodes, local_node_info.node_id);
|
||||
|
||||
/* this will restart monitoring in primary mode */
|
||||
monitoring_state = MS_NORMAL;
|
||||
@@ -958,12 +981,12 @@ monitor_streaming_standby(void)
|
||||
get_active_sibling_node_records(local_conn,
|
||||
local_node_info.node_id,
|
||||
local_node_info.upstream_node_id,
|
||||
&standby_nodes);
|
||||
&sibling_nodes);
|
||||
|
||||
if (standby_nodes.node_count > 0)
|
||||
if (sibling_nodes.node_count > 0)
|
||||
{
|
||||
log_debug("scanning %i node records to detect new primary...", standby_nodes.node_count);
|
||||
for (cell = standby_nodes.head; cell; cell = cell->next)
|
||||
log_debug("scanning %i node records to detect new primary...", sibling_nodes.node_count);
|
||||
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
/* skip local node check, we did that above */
|
||||
if (cell->node_info->node_id == local_node_info.node_id)
|
||||
@@ -993,7 +1016,7 @@ monitor_streaming_standby(void)
|
||||
follow_new_primary(follow_node_id);
|
||||
}
|
||||
}
|
||||
clear_node_info_list(&standby_nodes);
|
||||
clear_node_info_list(&sibling_nodes);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1395,12 +1418,12 @@ monitor_streaming_witness(void)
|
||||
get_active_sibling_node_records(local_conn,
|
||||
local_node_info.node_id,
|
||||
local_node_info.upstream_node_id,
|
||||
&standby_nodes);
|
||||
&sibling_nodes);
|
||||
|
||||
if (standby_nodes.node_count > 0)
|
||||
if (sibling_nodes.node_count > 0)
|
||||
{
|
||||
log_debug("scanning %i node records to detect new primary...", standby_nodes.node_count);
|
||||
for (cell = standby_nodes.head; cell; cell = cell->next)
|
||||
log_debug("scanning %i node records to detect new primary...", sibling_nodes.node_count);
|
||||
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
/* skip local node check, we did that above */
|
||||
if (cell->node_info->node_id == local_node_info.node_id)
|
||||
@@ -1430,7 +1453,7 @@ monitor_streaming_witness(void)
|
||||
witness_follow_new_primary(follow_node_id);
|
||||
}
|
||||
}
|
||||
clear_node_info_list(&standby_nodes);
|
||||
clear_node_info_list(&sibling_nodes);
|
||||
}
|
||||
}
|
||||
loop:
|
||||
@@ -1531,7 +1554,7 @@ do_primary_failover(void)
|
||||
}
|
||||
else if (election_result == ELECTION_WON)
|
||||
{
|
||||
if (standby_nodes.node_count > 0)
|
||||
if (sibling_nodes.node_count > 0)
|
||||
{
|
||||
log_notice("this node is the winner, will now promote itself and inform other nodes");
|
||||
}
|
||||
@@ -1576,7 +1599,7 @@ do_primary_failover(void)
|
||||
get_active_sibling_node_records(local_conn,
|
||||
local_node_info.node_id,
|
||||
upstream_node_info.node_id,
|
||||
&standby_nodes);
|
||||
&sibling_nodes);
|
||||
|
||||
}
|
||||
else if (config_file_options.failover == FAILOVER_MANUAL)
|
||||
@@ -1638,10 +1661,10 @@ do_primary_failover(void)
|
||||
{
|
||||
case FAILOVER_STATE_PROMOTED:
|
||||
/* notify former siblings that they should now follow this node */
|
||||
notify_followers(&standby_nodes, local_node_info.node_id);
|
||||
notify_followers(&sibling_nodes, local_node_info.node_id);
|
||||
|
||||
/* we no longer care about our former siblings */
|
||||
clear_node_info_list(&standby_nodes);
|
||||
clear_node_info_list(&sibling_nodes);
|
||||
|
||||
/* pass control back down to start_monitoring() */
|
||||
log_info(_("switching to primary monitoring mode"));
|
||||
@@ -1655,10 +1678,10 @@ do_primary_failover(void)
|
||||
* notify siblings that they should resume following the original
|
||||
* primary
|
||||
*/
|
||||
notify_followers(&standby_nodes, upstream_node_info.node_id);
|
||||
notify_followers(&sibling_nodes, upstream_node_info.node_id);
|
||||
|
||||
/* we no longer care about our former siblings */
|
||||
clear_node_info_list(&standby_nodes);
|
||||
clear_node_info_list(&sibling_nodes);
|
||||
|
||||
/* pass control back down to start_monitoring() */
|
||||
log_info(_("resuming standby monitoring mode"));
|
||||
@@ -2543,6 +2566,7 @@ do_election(void)
|
||||
|
||||
/* we're visible */
|
||||
int visible_nodes = 1;
|
||||
int total_nodes = 0;
|
||||
|
||||
NodeInfoListCell *cell = NULL;
|
||||
|
||||
@@ -2593,14 +2617,16 @@ do_election(void)
|
||||
get_active_sibling_node_records(local_conn,
|
||||
local_node_info.node_id,
|
||||
upstream_node_info.node_id,
|
||||
&standby_nodes);
|
||||
&sibling_nodes);
|
||||
|
||||
total_nodes = sibling_nodes.node_count + 1;
|
||||
|
||||
log_debug("do_election(): primary location is %s", upstream_node_info.location);
|
||||
|
||||
local_node_info.last_wal_receive_lsn = InvalidXLogRecPtr;
|
||||
|
||||
/* fast path if no other standbys (or witness) exists - normally win by default */
|
||||
if (standby_nodes.node_count == 0)
|
||||
if (sibling_nodes.node_count == 0)
|
||||
{
|
||||
if (strncmp(upstream_node_info.location, local_node_info.location, MAXLEN) == 0)
|
||||
{
|
||||
@@ -2628,7 +2654,7 @@ do_election(void)
|
||||
}
|
||||
else
|
||||
{
|
||||
/* standby nodes found - check if we're in the primary location befor checking theirs */
|
||||
/* standby nodes found - check if we're in the primary location before checking theirs */
|
||||
if (strncmp(upstream_node_info.location, local_node_info.location, MAXLEN) == 0)
|
||||
{
|
||||
primary_location_seen = true;
|
||||
@@ -2643,7 +2669,7 @@ do_election(void)
|
||||
/* pointer to "winning" node, initially self */
|
||||
candidate_node = &local_node_info;
|
||||
|
||||
for (cell = standby_nodes.head; cell; cell = cell->next)
|
||||
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
/* assume the worst case */
|
||||
cell->node_info->node_status = NODE_STATUS_UNKNOWN;
|
||||
@@ -2698,7 +2724,7 @@ do_election(void)
|
||||
candidate_node = cell->node_info;
|
||||
}
|
||||
/* LSN is same - tiebreak on priority, then node_id */
|
||||
else if(cell->node_info->last_wal_receive_lsn == candidate_node->last_wal_receive_lsn)
|
||||
else if (cell->node_info->last_wal_receive_lsn == candidate_node->last_wal_receive_lsn)
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "node %i has same LSN as current candidate %i",
|
||||
cell->node_info->node_id,
|
||||
@@ -2750,9 +2776,9 @@ do_election(void)
|
||||
|
||||
log_debug("visible nodes: %i; total nodes: %i",
|
||||
visible_nodes,
|
||||
standby_nodes.node_count);
|
||||
total_nodes);
|
||||
|
||||
if (visible_nodes <= (standby_nodes.node_count / 2.0))
|
||||
if (visible_nodes <= (total_nodes / 2.0))
|
||||
{
|
||||
log_notice(_("unable to reach a qualified majority of nodes"));
|
||||
log_detail(_("node will enter degraded monitoring state waiting for reconnect"));
|
||||
|
||||
Reference in New Issue
Block a user