mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
45 Commits
v4.0.5
...
REL4_0_STA
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ee1a6f9d0f | ||
|
|
49eb408873 | ||
|
|
fba3d29514 | ||
|
|
77200e5030 | ||
|
|
4589b8d439 | ||
|
|
048f7c3310 | ||
|
|
1e5f63792f | ||
|
|
d26989bd12 | ||
|
|
f999c810a7 | ||
|
|
81077d4bc2 | ||
|
|
a549941d4f | ||
|
|
2f6c159f9a | ||
|
|
2eca1a0311 | ||
|
|
f6377084ec | ||
|
|
d85c02b92b | ||
|
|
d9ba41fc35 | ||
|
|
afdaf9be66 | ||
|
|
8067924c3e | ||
|
|
e94a6eefde | ||
|
|
69d7b6f7eb | ||
|
|
8ec3b2a536 | ||
|
|
68a9745e7e | ||
|
|
20ce53e2d2 | ||
|
|
638a119c85 | ||
|
|
053863cdd0 | ||
|
|
009cc0480c | ||
|
|
63bdc19132 | ||
|
|
fbd389d0b3 | ||
|
|
4aef4ea11e | ||
|
|
0ffaff75df | ||
|
|
c54bb73fb2 | ||
|
|
28ea2e48de | ||
|
|
41274f5525 | ||
|
|
edceb32ccb | ||
|
|
3dba8336e9 | ||
|
|
97d0cee259 | ||
|
|
2dfe1d18e9 | ||
|
|
55bb93bd3f | ||
|
|
4c49954cd4 | ||
|
|
a880b6ce16 | ||
|
|
c51a2283dd | ||
|
|
717828e73e | ||
|
|
c7477d7a9c | ||
|
|
1db8d3904f | ||
|
|
362f478d55 |
19
HISTORY
19
HISTORY
@@ -1,3 +1,22 @@
|
||||
4.0.6 2018-06-14
|
||||
repmgr: (witness register) prevent registration of a witness server with the
|
||||
same name as an existing node (Ian)
|
||||
repmgr: (standby follow) check node has actually connected to new primary
|
||||
before reporting success; GitHub #444 (Ian)
|
||||
repmgr: (standby clone) improve handling of external configuration file copying,
|
||||
including consideration in --dry-run check; GitHub #443 (Ian)
|
||||
repmgr: (standby clone) don't require presence of "user" parameter in
|
||||
conninfo string; GitHub #437 (Ian)
|
||||
repmgr: (standby clone) improve documentation of --recovery-conf-only
|
||||
mode; GitHub #438 (Ian)
|
||||
repmgr: (node rejoin) fix bug when parsing --config-files parameter;
|
||||
GitHub #442 (Ian)
|
||||
repmgr: when using --dry-run, force log level to INFO to ensure output
|
||||
will always be displayed; GitHub #441 (Ian)
|
||||
repmgr: (cluster matrix/crosscheck) return non-zero exit code if node
|
||||
connection issues detected; GitHub #447 (Ian)
|
||||
repmgrd: ensure local node is counted as quorum member; GitHub #439 (Ian)
|
||||
|
||||
4.0.5 2018-05-02
|
||||
repmgr: poll demoted primary after restart as a standby during a
|
||||
switchover operation; GitHub #408 (Ian)
|
||||
|
||||
20
configfile.c
20
configfile.c
@@ -319,13 +319,20 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
options->use_primary_conninfo_password = false;
|
||||
memset(options->passfile, 0, sizeof(options->passfile));
|
||||
|
||||
/*-----------------------
|
||||
/*-------------------------
|
||||
* standby promote settings
|
||||
*------------------------
|
||||
*-------------------------
|
||||
*/
|
||||
options->promote_check_timeout = DEFAULT_PROMOTE_CHECK_TIMEOUT;
|
||||
options->promote_check_interval = DEFAULT_PROMOTE_CHECK_INTERVAL;
|
||||
|
||||
/*------------------------
|
||||
* standby follow settings
|
||||
*------------------------
|
||||
*/
|
||||
options->primary_follow_timeout = DEFAULT_PRIMARY_FOLLOW_TIMEOUT;
|
||||
options->standby_follow_timeout = DEFAULT_STANDBY_FOLLOW_TIMEOUT;
|
||||
|
||||
/*-----------------
|
||||
* repmgrd settings
|
||||
*-----------------
|
||||
@@ -345,7 +352,6 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
options->degraded_monitoring_timeout = -1;
|
||||
options->async_query_timeout = DEFAULT_ASYNC_QUERY_TIMEOUT;
|
||||
options->primary_notification_timeout = DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT;
|
||||
options->primary_follow_timeout = DEFAULT_PRIMARY_FOLLOW_TIMEOUT;
|
||||
options->standby_reconnect_timeout = DEFAULT_STANDBY_RECONNECT_TIMEOUT;
|
||||
|
||||
/*-------------
|
||||
@@ -527,6 +533,12 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
else if (strcmp(name, "promote_check_interval") == 0)
|
||||
options->promote_check_interval = repmgr_atoi(value, name, error_list, 1);
|
||||
|
||||
/* standby follow settings */
|
||||
else if (strcmp(name, "primary_follow_timeout") == 0)
|
||||
options->primary_follow_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "standby_follow_timeout") == 0)
|
||||
options->standby_follow_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
|
||||
/* node check settings */
|
||||
else if (strcmp(name, "archive_ready_warning") == 0)
|
||||
options->archive_ready_warning = repmgr_atoi(value, name, error_list, 1);
|
||||
@@ -576,8 +588,6 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
||||
options->async_query_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "primary_notification_timeout") == 0)
|
||||
options->primary_notification_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "primary_follow_timeout") == 0)
|
||||
options->primary_follow_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
else if (strcmp(name, "standby_reconnect_timeout") == 0)
|
||||
options->standby_reconnect_timeout = repmgr_atoi(value, name, error_list, 0);
|
||||
|
||||
|
||||
@@ -98,6 +98,10 @@ typedef struct
|
||||
int promote_check_timeout;
|
||||
int promote_check_interval;
|
||||
|
||||
/* standby follow settings */
|
||||
int primary_follow_timeout;
|
||||
int standby_follow_timeout;
|
||||
|
||||
/* node check settings */
|
||||
int archive_ready_warning;
|
||||
int archive_ready_critical;
|
||||
@@ -120,7 +124,6 @@ typedef struct
|
||||
int degraded_monitoring_timeout;
|
||||
int async_query_timeout;
|
||||
int primary_notification_timeout;
|
||||
int primary_follow_timeout;
|
||||
int standby_reconnect_timeout;
|
||||
|
||||
/* BDR settings */
|
||||
@@ -167,6 +170,9 @@ typedef struct
|
||||
false, "", "", { NULL, NULL }, "", false, "", false, "", \
|
||||
/* standby promote settings */ \
|
||||
DEFAULT_PROMOTE_CHECK_TIMEOUT, DEFAULT_PROMOTE_CHECK_INTERVAL, \
|
||||
/* standby follow settings */ \
|
||||
DEFAULT_PRIMARY_FOLLOW_TIMEOUT, \
|
||||
DEFAULT_STANDBY_FOLLOW_TIMEOUT, \
|
||||
/* node check settings */ \
|
||||
DEFAULT_ARCHIVE_READY_WARNING, DEFAULT_ARCHIVE_READY_CRITICAL, \
|
||||
DEFAULT_REPLICATION_LAG_WARNING, DEFAULT_REPLICATION_LAG_CRITICAL, \
|
||||
@@ -180,7 +186,6 @@ typedef struct
|
||||
false, -1, \
|
||||
DEFAULT_ASYNC_QUERY_TIMEOUT, \
|
||||
DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT, \
|
||||
DEFAULT_PRIMARY_FOLLOW_TIMEOUT, \
|
||||
DEFAULT_STANDBY_RECONNECT_TIMEOUT, \
|
||||
/* BDR settings */ \
|
||||
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
AC_INIT([repmgr], [4.0.5], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
|
||||
AC_INIT([repmgr], [4.0.6], [pgsql-bugs@postgresql.org], [repmgr], [https://2ndquadrant.com/en/resources/repmgr/])
|
||||
|
||||
AC_COPYRIGHT([Copyright (c) 2010-2018, 2ndQuadrant Ltd.])
|
||||
|
||||
|
||||
87
dbutils.c
87
dbutils.c
@@ -23,6 +23,7 @@
|
||||
#include <sys/time.h>
|
||||
#include <sys/stat.h>
|
||||
#include <dirent.h>
|
||||
#include <arpa/inet.h>
|
||||
|
||||
#include "repmgr.h"
|
||||
#include "dbutils.h"
|
||||
@@ -370,6 +371,37 @@ get_conninfo_value(const char *conninfo, const char *keyword, char *output)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Get a default conninfo value for the provided parameter, and copy
|
||||
* it to the 'output' buffer.
|
||||
*
|
||||
* Returns true on success, or false on failure (provided keyword not found).
|
||||
*
|
||||
*/
|
||||
bool
|
||||
get_conninfo_default_value(const char *param, char *output, int maxlen)
|
||||
{
|
||||
PQconninfoOption *defs = NULL;
|
||||
PQconninfoOption *def = NULL;
|
||||
bool found = false;
|
||||
|
||||
defs = PQconndefaults();
|
||||
|
||||
for (def = defs; def->keyword; def++)
|
||||
{
|
||||
if (strncmp(def->keyword, param, maxlen) == 0)
|
||||
{
|
||||
strncpy(output, def->val, maxlen);
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
|
||||
PQconninfoFree(defs);
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
initialize_conninfo_params(t_conninfo_param_list *param_list, bool set_defaults)
|
||||
{
|
||||
@@ -1733,7 +1765,7 @@ _populate_node_record(PGresult *res, t_node_info *node_info, int row)
|
||||
strncpy(node_info->location, PQgetvalue(res, row, 7), MAXLEN);
|
||||
node_info->priority = atoi(PQgetvalue(res, row, 8));
|
||||
node_info->active = atobool(PQgetvalue(res, row, 9));
|
||||
strncpy(node_info->config_file, PQgetvalue(res, row, 10), MAXLEN);
|
||||
strncpy(node_info->config_file, PQgetvalue(res, row, 10), MAXPGPATH);
|
||||
|
||||
/* This won't normally be set */
|
||||
strncpy(node_info->upstream_node_name, PQgetvalue(res, row, 11), MAXLEN);
|
||||
@@ -2146,8 +2178,9 @@ get_downstream_nodes_with_missing_slot(PGconn *conn, int this_node_id, NodeInfoL
|
||||
"LEFT JOIN pg_catalog.pg_replication_slots rs "
|
||||
" ON rs.slot_name = n.slot_name "
|
||||
" WHERE n.slot_name IS NOT NULL"
|
||||
" AND rs.slot_name IS NULL "
|
||||
" AND n.upstream_node_id = %i ",
|
||||
" AND rs.slot_name IS NULL "
|
||||
" AND n.upstream_node_id = %i "
|
||||
" AND n.type = 'standby'",
|
||||
this_node_id);
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_all_node_records_with_missing_slot():\n%s", query.data);
|
||||
@@ -2884,8 +2917,7 @@ get_datadir_configuration_files(PGconn *conn, KeyValueList *list)
|
||||
|
||||
for (i = 0; i < PQntuples(res); i++)
|
||||
{
|
||||
key_value_list_set(
|
||||
list,
|
||||
key_value_list_set(list,
|
||||
PQgetvalue(res, i, 1),
|
||||
PQgetvalue(res, i, 0));
|
||||
}
|
||||
@@ -3110,6 +3142,8 @@ _create_event(PGconn *conn, t_configuration_options *options, int node_id, char
|
||||
char event_timestamp[MAXLEN] = "";
|
||||
bool success = true;
|
||||
|
||||
log_verbose(LOG_DEBUG, "_create_event(): event is \"%s\" for node %i", event, node_id);
|
||||
|
||||
/*
|
||||
* Only attempt to write a record if a connection handle was provided.
|
||||
* Also check that the repmgr schema has been properly initialised - if
|
||||
@@ -3620,7 +3654,7 @@ get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record)
|
||||
|
||||
|
||||
int
|
||||
get_free_replication_slots(PGconn *conn)
|
||||
get_free_replication_slot_count(PGconn *conn)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
PGresult *res = NULL;
|
||||
@@ -3657,6 +3691,47 @@ get_free_replication_slots(PGconn *conn)
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
get_inactive_replication_slots(PGconn *conn, KeyValueList *list)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
PGresult *res = NULL;
|
||||
int i, inactive_slots = 0;
|
||||
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBuffer(&query,
|
||||
" SELECT slot_name, slot_type "
|
||||
" FROM pg_catalog.pg_replication_slots "
|
||||
" WHERE active IS FALSE "
|
||||
" ORDER BY slot_name ");
|
||||
|
||||
res = PQexec(conn, query.data);
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("unable to execute replication slot query"));
|
||||
log_detail("%s", PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return -1;
|
||||
}
|
||||
|
||||
inactive_slots = PQntuples(res);
|
||||
|
||||
for (i = 0; i < inactive_slots; i++)
|
||||
{
|
||||
key_value_list_set(list,
|
||||
PQgetvalue(res, i, 0),
|
||||
PQgetvalue(res, i, 1));
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
return inactive_slots;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* ==================== */
|
||||
/* tablespace functions */
|
||||
/* ==================== */
|
||||
|
||||
@@ -357,7 +357,7 @@ void close_connection(PGconn **conn);
|
||||
|
||||
/* conninfo manipulation functions */
|
||||
bool get_conninfo_value(const char *conninfo, const char *keyword, char *output);
|
||||
|
||||
bool get_conninfo_default_value(const char *param, char *output, int maxlen);
|
||||
void initialize_conninfo_params(t_conninfo_param_list *param_list, bool set_defaults);
|
||||
void free_conninfo_params(t_conninfo_param_list *param_list);
|
||||
void copy_conninfo_params(t_conninfo_param_list *dest_list, t_conninfo_param_list *source_list);
|
||||
@@ -369,6 +369,7 @@ bool parse_conninfo_string(const char *conninfo_str, t_conninfo_param_list *par
|
||||
char *param_list_to_string(t_conninfo_param_list *param_list);
|
||||
bool has_passfile(void);
|
||||
|
||||
|
||||
/* transaction functions */
|
||||
bool begin_transaction(PGconn *conn);
|
||||
bool commit_transaction(PGconn *conn);
|
||||
@@ -454,7 +455,8 @@ void create_slot_name(char *slot_name, int node_id);
|
||||
bool create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, PQExpBufferData *error_msg);
|
||||
bool drop_replication_slot(PGconn *conn, char *slot_name);
|
||||
RecordStatus get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
|
||||
int get_free_replication_slots(PGconn *conn);
|
||||
int get_free_replication_slot_count(PGconn *conn);
|
||||
int get_inactive_replication_slots(PGconn *conn, KeyValueList *list);
|
||||
|
||||
/* tablespace functions */
|
||||
bool get_tablespace_name_by_location(PGconn *conn, const char *location, char *name);
|
||||
|
||||
@@ -41,18 +41,19 @@
|
||||
<title>CentOS repositories</title>
|
||||
|
||||
<para>
|
||||
&repmgr; packages are available from the 2ndQuadrant repository, and also the PostgreSQL
|
||||
community repository. The 2ndQuadrant repository is updated immediately after each
|
||||
&repmgr; packages are available from the public 2ndQuadrant repository, and also the
|
||||
PostgreSQL community repository. The 2ndQuadrant repository is updated immediately
|
||||
after each
|
||||
&repmgr; release.
|
||||
</para>
|
||||
|
||||
<table id="centos-2ndquadrant-repository">
|
||||
<title>2ndQuadrant repository</title>
|
||||
<title>2ndQuadrant public repository</title>
|
||||
<tgroup cols="2">
|
||||
<tbody>
|
||||
<row>
|
||||
<entry>Repository URL:</entry>
|
||||
<entry><ulink url="http://packages.2ndquadrant.com/repmgr/">http://packages.2ndquadrant.com/repmgr/</ulink></entry>
|
||||
<entry><ulink url="https://rpm.2ndquadrant.com/">https://rpm.2ndquadrant.com/</ulink></entry>
|
||||
</row>
|
||||
<row>
|
||||
<entry>Repository documentation:</entry>
|
||||
|
||||
@@ -15,6 +15,113 @@
|
||||
See also: <xref linkend="upgrading-repmgr">
|
||||
</para>
|
||||
|
||||
<sect1 id="release-4.0.6">
|
||||
<title>Release 4.0.6</title>
|
||||
<para><emphasis>June 14, 2018</emphasis></para>
|
||||
<para>
|
||||
&repmgr; 4.0.6 contains a number of bug fixes and usability enhancements.
|
||||
</para>
|
||||
<para>
|
||||
We recommend upgrading to this version as soon as possible.
|
||||
This release can be installed as a simple package upgrade from repmgr 4.0 ~ 4.0.5;
|
||||
<application>repmgrd</application> (if running) should be restarted. See <xref linkend="upgrading-repmgr">
|
||||
for more details.
|
||||
</para>
|
||||
|
||||
<sect2>
|
||||
<title>Usability enhancements</title>
|
||||
|
||||
<para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-cluster-crosscheck">repmgr cluster crosscheck</link></command> and
|
||||
<command><link linkend="repmgr-cluster-matrix">repmgr cluster matrix</link></command>:
|
||||
return non-zero exit code if node connection issues detected (GitHub #447)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>:
|
||||
Improve handling of external configuration file copying, including consideration in
|
||||
<option>--dry-run</option> check
|
||||
(GitHub #443)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
When using <option>--dry-run</option>, force log level to <literal>INFO</literal>
|
||||
to ensure output will always be displayed
|
||||
(GitHub #441)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>:
|
||||
Improve documentation of <option>--recovery-conf-only</option> mode
|
||||
(GitHub #438)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>:
|
||||
Don't require presence of <varname>user</varname> parameter in conninfo string
|
||||
(GitHub #437)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
<sect2>
|
||||
<title>Bug fixes</title>
|
||||
<para>
|
||||
|
||||
<itemizedlist>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-witness-register">repmgr witness register</link></command>:
|
||||
prevent registration of a witness server with the same name as an existing node
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-standby-follow">repmgr standby follow</link></command>:
|
||||
check node has actually connected to new primary before reporting success
|
||||
(GitHub #444)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-node-rejoin">repmgr node rejoin</link></command>:
|
||||
Fix bug when parsing <option>--config-files</option> parameter
|
||||
(GitHub #442)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<application>repmgrd</application>: ensure local node is counted as quorum member
|
||||
(GitHub #439)
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="release-4.0.5">
|
||||
<title>Release 4.0.5</title>
|
||||
<para><emphasis>Wed May 2, 2018</emphasis></para>
|
||||
@@ -24,6 +131,7 @@
|
||||
generation and (in <application>repmgrd</application>) handling of various
|
||||
corner-case situations, as well as a number of bug fixes.
|
||||
</para>
|
||||
|
||||
<sect2>
|
||||
<title>Usability enhancements</title>
|
||||
|
||||
|
||||
@@ -33,34 +33,5 @@
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="repmgr-rpm-key" xreflabel="repmgr rpm key">
|
||||
<title>repmgr RPM signing key</title>
|
||||
<para>
|
||||
The signing key ID used for <application>repmgr</application> source code bundles is:
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr">
|
||||
<literal>0x702D883A</literal></ulink>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To download the <application>repmgr</application> source key to your computer:
|
||||
<programlisting>
|
||||
curl -s http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr | gpg --import
|
||||
gpg --fingerprint 0x702D883A
|
||||
</programlisting>
|
||||
then verify that the fingerprint is the expected value:
|
||||
<programlisting>
|
||||
AE4E 390E A58E 0037 6148 3F29 888D 018B 702D 883A</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To check a repository RPM, use <application>rpmkeys</application> to load the
|
||||
packaging signing key into the RPM database then use <literal>rpm -K</literal>, e.g.:
|
||||
<programlisting>
|
||||
sudo rpmkeys --import http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr
|
||||
rpm -K postgresql-bdr94-2ndquadrant-redhat-1.0-2.noarch.rpm
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
</sect1>
|
||||
|
||||
</appendix>
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
|
||||
<note>
|
||||
<para>
|
||||
If using <application>systemd</application>, ensure you have <varname>RemoteIPC</varname> set to <literal>off</literal>.
|
||||
If using <application>systemd</application>, ensure you have <varname>RemoveIPC</varname> set to <literal>off</literal>.
|
||||
See the <ulink url="https://wiki.postgresql.org/wiki/Systemd">systemd</ulink>
|
||||
entry in the <ulink url="https://wiki.postgresql.org/wiki/Main_Page">PostgreSQL wiki</ulink> for details.
|
||||
</para>
|
||||
@@ -47,16 +47,24 @@
|
||||
service_restart_command
|
||||
service_reload_command</programlisting>
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
It's also possible to specify a <varname>service_promote_command</varname>;
|
||||
this overrides any value contained in the setting <varname>promote_command</varname>.
|
||||
It's also possible to specify a <varname>service_promote_command</varname>.
|
||||
This is intended for systems which provide a package-level promote command,
|
||||
such as Debian's <application>pg_ctlcluster</application>.
|
||||
such as Debian's <application>pg_ctlcluster</application>, to promote the
|
||||
PostgreSQL from standby to primary.
|
||||
</para>
|
||||
<para>
|
||||
If your packaging system does not provide such a command, it can be left empty,
|
||||
and &repmgr; will generate the appropriate <command>pg_ctl ... promote</command> command.
|
||||
</para>
|
||||
<para>
|
||||
Do not confuse this with <varname>promote_command</varname>, which is used
|
||||
by <application>repmgrd</application> to execute <xref linkend="repmgr-standby-promote">.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
|
||||
<para>
|
||||
To confirm which command &repmgr; will execute for each action, use
|
||||
<command>repmgr node service --list --action=...</command>, e.g.:
|
||||
|
||||
@@ -217,9 +217,6 @@
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_promote_error</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>repmgrd_failover_promote</literal></simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara><literal>bdr_failover</literal></simpara>
|
||||
</listitem>
|
||||
|
||||
@@ -5,26 +5,27 @@
|
||||
system.
|
||||
</para>
|
||||
|
||||
<sect2 id="installation-packages-redhat" xreflabel="Installing from packages on RHEL, Fedora and CentOS">
|
||||
<sect2 id="installation-packages-redhat" xreflabel="Installing from packages on RHEL, CentOS and Fedora">
|
||||
|
||||
<indexterm>
|
||||
<primary>installation</primary>
|
||||
<secondary>on Red Hat/CentOS/Fedora etc.</secondary>
|
||||
</indexterm>
|
||||
|
||||
<title>RedHat/Fedora/CentOS</title>
|
||||
<title>RedHat/CentOS/Fedora</title>
|
||||
<para>
|
||||
RPM packages for &repmgr; are available via Yum through
|
||||
&repmgr; RPM packages for RedHat/CentOS variants and Fedora are available from the
|
||||
<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink>
|
||||
<ulink url="https://rpm.2ndquadrant.com/">public RPM repository</ulink>; see following
|
||||
section for details.
|
||||
</para>
|
||||
<para>
|
||||
RPM packages for &repmgr; are also available via Yum through
|
||||
the PostgreSQL Global Development Group RPM repository
|
||||
(<ulink url="https://yum.postgresql.org/">http://yum.postgresql.org/</ulink>).
|
||||
Follow the instructions for your distribution (RedHat, CentOS,
|
||||
Fedora, etc.) and architecture as detailed there.
|
||||
</para>
|
||||
<para>
|
||||
<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink> also provides its
|
||||
own RPM packages which are made available
|
||||
at the same time as each &repmgr; release, as it can take some days for
|
||||
them to become available via the main PGDG repository. See following section for details:
|
||||
Fedora, etc.) and architecture as detailed there. Note that it can take some days
|
||||
for new &repmgr; packages to become available via the this repository.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
@@ -43,59 +44,68 @@
|
||||
|
||||
|
||||
<sect3 id="installation-packages-redhat-2ndq">
|
||||
<title>2ndQuadrant repmgr yum repository</title>
|
||||
<title>2ndQuadrant public RPM yum repository</title>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
<ulink url="https://2ndquadrant.com">2ndQuadrant</ulink> previously provided a dedicated
|
||||
&repmgr; repository at
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/">http://packages.2ndquadrant.com/repmgr/</ulink>.
|
||||
This repository will be deprecated in a future release as it is now replaced by
|
||||
the <ulink url="https://rpm.2ndquadrant.com/">public RPM repository</ulink>
|
||||
documented below.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
Beginning with <ulink url="http://repmgr.org/release-notes-3.1.3.html">repmgr 3.1.3</ulink>,
|
||||
Beginning with <ulink url="https://repmgr.org/docs/4.0/release-4.0.5.html">repmgr 4.0.5</ulink>,
|
||||
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a dedicated <literal>yum</literal>
|
||||
repository for &repmgr; releases. This repository complements the main
|
||||
<ulink url="https://yum.postgresql.org/repopackages.php">PGDG community repository</ulink>,
|
||||
but enables repmgr users to access the latest &repmgr; packages before they are
|
||||
available via the PGDG repository, which can take several days to be updated following
|
||||
a fresh &repmgr; release.
|
||||
</para>
|
||||
<ulink url="https://rpm.2ndquadrant.com/">public RPM repository</ulink> for 2ndQuadrant software,
|
||||
including &repmgr;. We recommend using this for all future &repmgr; releases.
|
||||
</para>
|
||||
<para>
|
||||
General instructions for using this repository can be found on its
|
||||
<ulink url="https://rpm.2ndquadrant.com/">homepage</ulink>. Specific instructions
|
||||
for installing &repmgr; follow below.
|
||||
</para>
|
||||
<para>
|
||||
<emphasis>Installation</emphasis>
|
||||
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
Import the repository public key (optional but recommended):
|
||||
<programlisting>
|
||||
rpm --import http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
Locate the repository RPM for your PostgreSQL version from the list at:
|
||||
<ulink url="https://rpm.2ndquadrant.com/">https://rpm.2ndquadrant.com/</ulink>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the repository RPM for your distribution (this enables the 2ndQuadrant
|
||||
repository as a source of repmgr packages):
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<emphasis>Fedora:</emphasis>
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-fedora-1.0-1.noarch.rpm</ulink>
|
||||
</simpara>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<simpara>
|
||||
<emphasis>RHEL, CentOS etc:</emphasis>
|
||||
<ulink url="http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm">http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm</ulink>
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
e.g.:
|
||||
<programlisting>
|
||||
$ yum install http://packages.2ndquadrant.com/repmgr/yum-repo-rpms/repmgr-rhel-1.0-1.noarch.rpm</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
Install the repository RPM for your distribution and PostgreSQL version
|
||||
(this enables the 2ndQuadrant repository as a source of &repmgr; packages).
|
||||
</para>
|
||||
<para>
|
||||
For example, for PostgreSQL 10 on CentOS, execute:
|
||||
<programlisting>
|
||||
sudo yum install https://rpm.2ndquadrant.com/site/content/2ndquadrant-repo-10-1-1.el7.noarch.rpm
|
||||
</programlisting>
|
||||
</para>
|
||||
<para>
|
||||
Verify that the repository is installed with:
|
||||
<programlisting>
|
||||
sudo yum repolist</programlisting>
|
||||
The output should contain two entries like this:
|
||||
<programlisting>
|
||||
2ndquadrant-repo-10/7/x86_64 2ndQuadrant packages for PG10 for rhel 7 - x86_64 1
|
||||
2ndquadrant-repo-10-debug/7/x86_64 2ndQuadrant packages for PG10 for rhel 7 - x86_64 - Debug 1</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr96</literal>), e.g.:
|
||||
Install the &repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr10</literal>):
|
||||
<programlisting>
|
||||
$ yum install repmgr96</programlisting>
|
||||
$ yum install repmgr10</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
@@ -105,13 +115,13 @@
|
||||
<emphasis>Compatibility with PGDG Repositories</emphasis>
|
||||
</para>
|
||||
<para>
|
||||
The 2ndQuadrant &repmgr; yum repository uses exactly the same package definitions as the
|
||||
main PGDG repository and is effectively a selective mirror for &repmgr; packages only.
|
||||
The 2ndQuadrant &repmgr; yum repository packages use the same definitions and file system layout as the
|
||||
main PGDG repository.
|
||||
</para>
|
||||
<para>
|
||||
Normally yum should prioritize the repository with the most recent &repmgr; version.
|
||||
Once the PGDG repository has been updated, it doesn't matter which repository
|
||||
the packages are installed from.
|
||||
Normally <application>yum</application> will prioritize the repository with the most recent &repmgr; version.
|
||||
Once the PGDG repository has been updated, it doesn't matter which repository
|
||||
the packages are installed from.
|
||||
</para>
|
||||
<para>
|
||||
To ensure the 2ndQuadrant repository is always prioritised, install <literal>yum-plugin-priorities</literal>
|
||||
@@ -125,30 +135,23 @@
|
||||
To install a specific package version, execute <command>yum --showduplicates list</command>
|
||||
for the package in question:
|
||||
<programlisting>
|
||||
[root@localhost ~]# yum --showduplicates list repmgr96
|
||||
[root@localhost ~]# yum --showduplicates list repmgr10
|
||||
Loaded plugins: fastestmirror
|
||||
Loading mirror speeds from cached hostfile
|
||||
* base: ftp.iij.ad.jp
|
||||
* extras: ftp.iij.ad.jp
|
||||
* updates: ftp.iij.ad.jp
|
||||
Available Packages
|
||||
repmgr96.x86_64 3.2-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.2.1-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3.1-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3.2-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 3.3.2-1.rhel6 pgdg96
|
||||
repmgr96.x86_64 4.0.0-1.el6 2ndquadrant-repmgr
|
||||
repmgr96.x86_64 4.0.0-1.rhel6 pgdg96</programlisting>
|
||||
repmgr10.x86_64 4.0.3-1.rhel7 pgdg10
|
||||
repmgr10.x86_64 4.0.4-1.rhel7 pgdg10
|
||||
repmgr10.x86_64 4.0.5-1.el7 2ndquadrant-repo-10</programlisting>
|
||||
then append the appropriate version number to the package name with a hyphen, e.g.:
|
||||
<programlisting>
|
||||
[root@localhost ~]# yum install repmgr96-3.3.2-1.el6</programlisting>
|
||||
[root@localhost ~]# yum install repmgr10-4.0.3-1.rhel7</programlisting>
|
||||
</para>
|
||||
</sect3>
|
||||
</sect2>
|
||||
|
||||
|
||||
|
||||
<sect2 id="installation-packages-debian" xreflabel="Installing from packages on Debian or Ubuntu">
|
||||
|
||||
<indexterm>
|
||||
@@ -168,6 +171,79 @@
|
||||
see the appendix section <xref linkend="packages-debian-ubuntu">.
|
||||
</para>
|
||||
|
||||
<sect3 id="installation-packages-debian-ubuntu-2ndq">
|
||||
<title>2ndQuadrant public apt repository for Debian/Ubuntu</title>
|
||||
|
||||
<para>
|
||||
Beginning with <ulink url="https://repmgr.org/docs/4.0/release-4.0.5.html">repmgr 4.0.5</ulink>,
|
||||
<ulink url="https://2ndquadrant.com/">2ndQuadrant</ulink> provides a
|
||||
<ulink url="https://apt.2ndquadrant.com/">public apt repository</ulink> for 2ndQuadrant software,
|
||||
including &repmgr;.
|
||||
</para>
|
||||
<para>
|
||||
General instructions for using this repository can be found on its
|
||||
<ulink url="https://apt.2ndquadrant.com/">homepage</ulink>. Specific instructions
|
||||
for installing &repmgr; follow below.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<emphasis>Installation</emphasis>
|
||||
|
||||
<itemizedlist>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
If not already present, install the <application>apt-transport-https</application> package:
|
||||
<programlisting>
|
||||
sudo apt-get install apt-transport-https</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Create <filename>/etc/apt/sources.list.d/2ndquadrant.list</filename> as follows:
|
||||
<programlisting>
|
||||
sudo sh -c 'echo "deb https://apt.2ndquadrant.com/ $(lsb_release -cs)-2ndquadrant main" > /etc/apt/sources.list.d/2ndquadrant.list'</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the 2ndQuadrant <ulink url="https://apt.2ndquadrant.com/site/keys/9904CD4BD6BAF0C3.asc">repository key</ulink>:
|
||||
<programlisting>
|
||||
sudo apt-get install curl ca-certificates
|
||||
curl https://apt.2ndquadrant.com/site/keys/9904CD4BD6BAF0C3.asc | sudo apt-key add -</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Update the package list
|
||||
<programlisting>
|
||||
sudo apt-get update</programlisting>
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Install the &repmgr version appropriate for your PostgreSQL version (e.g. <literal>repmgr10</literal>):
|
||||
<programlisting>
|
||||
$ apt-get install postgresql-10-repmgr</programlisting>
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
For packages for PostgreSQL 9.6 and earlier, the package name includes
|
||||
a period between major and minor version numbers, e.g.
|
||||
<literal>postgresql-9.6-repmgr</literal>.
|
||||
</para>
|
||||
</note>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
</para>
|
||||
|
||||
</sect3>
|
||||
</sect2>
|
||||
|
||||
</sect1>
|
||||
|
||||
@@ -80,7 +80,7 @@
|
||||
</para>
|
||||
|
||||
<para>
|
||||
There are also tags for each &repmgr; release, e.g. <filename>REL4_0_STABLE</filename>.
|
||||
There are also tags for each &repmgr; release, e.g. <filename>4.0.5</filename>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
|
||||
@@ -38,5 +38,34 @@
|
||||
and therefore determine the state of outbound connections from that node.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
Following exit codes can be emitted by <command>repmgr cluster crosscheck</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The check completed successfully and all nodes are reachable.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_CLUSTER_CHECK (25)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
One or more nodes could not be reached.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
|
||||
|
||||
@@ -97,5 +97,35 @@
|
||||
useful result.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
Following exit codes can be emitted by <command>repmgr cluster matrix</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
The check completed successfully and all nodes are reachable.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_CLUSTER_CHECK (25)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
One or more nodes could not be reached.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
</refentry>
|
||||
|
||||
|
||||
@@ -79,9 +79,26 @@
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
<para>
|
||||
Individual checks can also be output in a Nagios-compatible format by additionally
|
||||
providing the option <literal>--nagios</literal>.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Output format</title>
|
||||
<para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--csv</literal>: generate output in CSV format (not available
|
||||
for individual checks)
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--nagios</literal>: generate output in a Nagios-compatible format
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</refsect1>
|
||||
</refentry>
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
<title>Example</title>
|
||||
<para>
|
||||
<programlisting>
|
||||
$ repmgr -f /etc/repmgr.comf node status
|
||||
$ repmgr -f /etc/repmgr.conf node status
|
||||
Node "node1":
|
||||
PostgreSQL version: 10beta1
|
||||
Total data size: 30 MB
|
||||
@@ -38,6 +38,20 @@
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Output format</title>
|
||||
<para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
|
||||
<listitem>
|
||||
<simpara>
|
||||
<literal>--csv</literal>: generate output in CSV format
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
|
||||
@@ -124,7 +124,7 @@
|
||||
<para>
|
||||
We recommend using <ulink url="https://www.pgbarman.org/">Barman</ulink> to manage
|
||||
WAL file archiving. For more details on combining &repmgr; and <application>Barman</application>,
|
||||
in particular using <varname>restore_command</varname> to configure Barman as a backu source of
|
||||
in particular using <varname>restore_command</varname> to configure Barman as a backup source of
|
||||
WAL files, see <xref linkend="cloning-from-barman">.
|
||||
</para>
|
||||
</note>
|
||||
@@ -177,12 +177,13 @@
|
||||
<title>Using a standby cloned by another method</title>
|
||||
<para>
|
||||
&repmgr; supports standbys cloned by another method (e.g. using <application>barman</application>'s
|
||||
<command>barman recover</command> command).
|
||||
<command><ulink url="http://docs.pgbarman.org/release/2.4/#recover">barman recover</ulink></command> command).
|
||||
</para>
|
||||
<para>
|
||||
To integrate the standby as a &repmgr; node, ensure the <filename>repmgr.conf</filename>
|
||||
file is created for the node, then execute the command
|
||||
<command>repmgr standby clone --recovery-conf-only</command>.
|
||||
file is created for the node, and that it has been registered using
|
||||
<command><link linkend="repmgr-standby-register">repmgr standby register</link></command>.
|
||||
Then execute the command <command>repmgr standby clone --recovery-conf-only</command>.
|
||||
This will create the <filename>recovery.conf</filename> file needed to attach
|
||||
the node to its upstream, and will also create a replication slot on the
|
||||
upstream node if required.
|
||||
|
||||
@@ -26,10 +26,18 @@
|
||||
running. It can only be used to attach an active standby to the current primary node
|
||||
(and not to another standby).
|
||||
</para>
|
||||
<para>
|
||||
To re-add an inactive node to the replication cluster, see
|
||||
<xref linkend="repmgr-node-rejoin">
|
||||
</para>
|
||||
<tip>
|
||||
<para>
|
||||
To re-add an inactive node to the replication cluster, use
|
||||
<xref linkend="repmgr-node-rejoin">.
|
||||
</para>
|
||||
</tip>
|
||||
|
||||
<para>
|
||||
<command>repmgr standby follow</command> will wait up to
|
||||
<varname>standby_follow_timeout</varname> seconds (default: <literal>30</literal>)
|
||||
to verify the standby has actually connected to the new primary.
|
||||
</para>
|
||||
|
||||
</refsect1>
|
||||
|
||||
@@ -92,7 +100,7 @@
|
||||
A <literal>standby_follow</literal> <link linkend="event-notifications">event notification</link> will be generated.
|
||||
</para>
|
||||
<para>
|
||||
If provided, &repmgr; will subsitute the placeholders <literal>%p</literal> with the node ID of the primary
|
||||
If provided, &repmgr; will substitute the placeholders <literal>%p</literal> with the node ID of the primary
|
||||
being followed, <literal>%c</literal> with its <literal>conninfo</literal> string, and
|
||||
<literal>%a</literal> with its node name.
|
||||
</para>
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
check the promotion every <varname>promote_check_interval</varname> seconds (default: 1 second).
|
||||
Both values can be defined in <filename>repmgr.conf</filename>.
|
||||
</para>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
|
||||
@@ -173,7 +173,7 @@
|
||||
</para>
|
||||
|
||||
<para>
|
||||
If provided, &repmgr; will subsitute the placeholders <literal>%p</literal> with the node ID of the
|
||||
If provided, &repmgr; will substitute the placeholders <literal>%p</literal> with the node ID of the
|
||||
primary node, <literal>%c</literal> with its <literal>conninfo</literal> string, and
|
||||
<literal>%a</literal> with its node name.
|
||||
</para>
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
<refpurpose>promote a standby to primary and demote the existing primary to a standby</refpurpose>
|
||||
</refnamediv>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>Description</title>
|
||||
|
||||
@@ -39,6 +40,14 @@
|
||||
For more details on performing a switchover, including preparation and configuration,
|
||||
see section <xref linkend="performing-switchover">.
|
||||
</para>
|
||||
|
||||
<note>
|
||||
<para>
|
||||
<application>repmgrd</application> should not be active on any nodes while a switchover is being
|
||||
executed. This restriction may be lifted in a later version.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
@@ -171,10 +180,12 @@
|
||||
Execute with the <literal>--dry-run</literal> option to test the switchover as far as
|
||||
possible without actually changing the status of either node.
|
||||
</para>
|
||||
<para>
|
||||
<application>repmgrd</application> should not be active on any nodes while a switchover is being
|
||||
executed. This restriction may be lifted in a later version.
|
||||
</para>
|
||||
<important>
|
||||
<para>
|
||||
<application>repmgrd</application> must be shut down on all nodes while a switchover is being
|
||||
executed. This restriction will be removed in a future &repmgr; version.
|
||||
</para>
|
||||
</important>
|
||||
<para>
|
||||
External database connections, e.g. from an application, should not be permitted while
|
||||
the switchover is taking place. In particular, active transactions on the primary
|
||||
@@ -199,7 +210,7 @@
|
||||
<refsect1>
|
||||
<title>Exit codes</title>
|
||||
<para>
|
||||
Following exit codes can be emitted by <literal>repmgr standby switchover</literal>:
|
||||
Following exit codes can be emitted by <command>repmgr standby switchover</command>:
|
||||
</para>
|
||||
<variablelist>
|
||||
|
||||
@@ -227,7 +238,7 @@
|
||||
<para>
|
||||
The switchover was executed but a problem was encountered.
|
||||
Typically this means the former primary could not be reattached
|
||||
as a standby.
|
||||
as a standby. Check preceding log messages for more information.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
@@ -25,7 +25,13 @@
|
||||
<para>
|
||||
This is the official documentation of &repmgr; &repmgrversion; for
|
||||
use with PostgreSQL 9.3 - PostgreSQL 10.
|
||||
It describes the functionality supported by the current version of &repmgr;.
|
||||
</para>
|
||||
<para>
|
||||
&repmgr; is being continually developed and we strongly recommend using the
|
||||
latest version. Please check the
|
||||
<ulink url="https://repmgr.org/">repmgr website</ulink> for details
|
||||
about the current &repmgr; version as well as the
|
||||
<ulink url="https://repmgr.org/docs/current/index.html">current documentation</ulink>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
|
||||
@@ -99,15 +99,16 @@
|
||||
replication cluster. The database must be the BDR-enabled database.
|
||||
</para>
|
||||
<para>
|
||||
If defined, the evenr <application>event_notifications</application> parameter
|
||||
will restrict execution of <varname>event_notification_command</varname>
|
||||
If defined, the <varname>event_notifications</varname> parameter will restrict
|
||||
execution of the script defined in <varname>event_notification_command</varname>
|
||||
to the specified event(s).
|
||||
</para>
|
||||
<note>
|
||||
<simpara>
|
||||
<varname>event_notification_command</varname> is the script which does the actual "heavy lifting"
|
||||
of reconfiguring the proxy server/ connection pooler. It is fully
|
||||
user-definable; a reference implementation is documented below.
|
||||
user-definable; see section <xref linkend="bdr-event-notification-command"> for a reference
|
||||
implementation.
|
||||
</simpara>
|
||||
</note>
|
||||
|
||||
@@ -169,8 +170,8 @@
|
||||
</para>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="bdr-event-notification-command" xreflabel="BDR failover event notification command">
|
||||
<title>Defining the "event_notification_command"</title>
|
||||
<sect1 id="bdr-event-notification-command" xreflabel="Defining the BDR failover "event_notification command"">
|
||||
<title>Defining the BDR failover "event_notification_command"</title>
|
||||
<para>
|
||||
Key to "failover" execution is the <literal>event_notification_command</literal>,
|
||||
which is a user-definable script specified in <filename>repmpgr.conf</filename>
|
||||
|
||||
@@ -34,6 +34,24 @@
|
||||
the <ulink url="https://www.postgresql.org/docs/current/static/runtime-config-client.html#GUC-SHARED-PRELOAD-LIBRARIES">PostgreSQL documentation</ulink>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
To apply configuration file changes to a running <application>repmgrd</application>
|
||||
daemon, execute the operating system's r<application>repmgrd</application> service reload command
|
||||
(see <xref linkend="appendix-packages"> for examples),
|
||||
or for instances which were manually started, execute <command>kill -HUP</command>, e.g.
|
||||
<command>kill -HUP `cat /tmp/repmgrd.pid`</command>.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
Check the <application>repmgrd</application> log to see what changes were
|
||||
applied, or if any issues were encountered when reloading the configuration.
|
||||
</para>
|
||||
</note>
|
||||
<para>
|
||||
Note that only a subset of configuration file parameters can be changed on a
|
||||
running <application>repmgrd</application> daemon.
|
||||
</para>
|
||||
|
||||
<sect2 id="repmgrd-automatic-failover-configuration">
|
||||
<title>automatic failover configuration</title>
|
||||
<para>
|
||||
@@ -162,13 +180,6 @@
|
||||
repmgrd -f /etc/repmgr.conf --pid-file /tmp/repmgrd.pid --daemonize</programlisting>
|
||||
and stopped with <command>kill `cat /tmp/repmgrd.pid`</command>. Adjust paths as appropriate.
|
||||
</para>
|
||||
<para>
|
||||
To apply configuration file changes to a running <application>repmgrd</application>
|
||||
daemon, execute the operating system's service reload command (for manually started
|
||||
instances, execute <command>kill -HUP `cat /tmp/repmgrd.pid`</command>).
|
||||
Note that only a subset of configuration file parameters can be changed on a
|
||||
running <application>repmgrd</application> daemon.
|
||||
</para>
|
||||
|
||||
<sect2 id="repmgrd-configuration-debian-ubuntu">
|
||||
<indexterm>
|
||||
|
||||
@@ -140,10 +140,12 @@
|
||||
manually with <command>repmgr node check --archive-ready</command>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Ensure that <application>repmgrd</application> is *not* running anywhere to prevent it unintentionally
|
||||
promoting a node.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
Ensure that <application>repmgrd</application> is *not* running anywhere to prevent it unintentionally
|
||||
promoting a node. This restriction will be removed in a future &repmgr; version.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
<para>
|
||||
Finally, consider executing <command>repmgr standby switchover</command> with the
|
||||
|
||||
@@ -1 +1 @@
|
||||
<!ENTITY repmgrversion "4.0.5">
|
||||
<!ENTITY repmgrversion "4.0.6">
|
||||
|
||||
@@ -46,5 +46,6 @@
|
||||
#define ERR_SWITCHOVER_INCOMPLETE 22
|
||||
#define ERR_FOLLOW_FAIL 23
|
||||
#define ERR_REJOIN_FAIL 24
|
||||
#define ERR_CLUSTER_CHECK 25
|
||||
|
||||
#endif /* _ERRCODE_H_ */
|
||||
|
||||
7
log.c
7
log.c
@@ -329,6 +329,13 @@ logger_set_terse(void)
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
logger_set_min_level(int min_log_level)
|
||||
{
|
||||
if (min_log_level > log_level)
|
||||
log_level = min_log_level;
|
||||
}
|
||||
|
||||
int
|
||||
detect_log_level(const char *level)
|
||||
{
|
||||
|
||||
1
log.h
1
log.h
@@ -128,6 +128,7 @@ bool logger_shutdown(void);
|
||||
|
||||
void logger_set_verbose(void);
|
||||
void logger_set_terse(void);
|
||||
void logger_set_min_level(int min_log_level);
|
||||
|
||||
void
|
||||
log_detail(const char *fmt,...)
|
||||
|
||||
@@ -569,6 +569,8 @@ do_cluster_crosscheck(void)
|
||||
|
||||
t_node_status_cube **cube;
|
||||
|
||||
bool error_found = false;
|
||||
|
||||
n = build_cluster_crosscheck(&cube, &name_length);
|
||||
if (runtime_options.output_mode == OM_CSV)
|
||||
{
|
||||
@@ -648,9 +650,11 @@ do_cluster_crosscheck(void)
|
||||
{
|
||||
case -2:
|
||||
c = '?';
|
||||
error_found = true;
|
||||
break;
|
||||
case -1:
|
||||
c = 'x';
|
||||
error_found = true;
|
||||
break;
|
||||
case 0:
|
||||
c = '*';
|
||||
@@ -689,6 +693,11 @@ do_cluster_crosscheck(void)
|
||||
|
||||
free(cube);
|
||||
}
|
||||
|
||||
if (error_found == true)
|
||||
{
|
||||
exit(ERR_CLUSTER_CHECK);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -704,6 +713,8 @@ do_cluster_matrix()
|
||||
|
||||
t_node_matrix_rec **matrix_rec_list;
|
||||
|
||||
bool error_found = false;
|
||||
|
||||
n = build_cluster_matrix(&matrix_rec_list, &name_length);
|
||||
|
||||
if (runtime_options.output_mode == OM_CSV)
|
||||
@@ -742,9 +753,11 @@ do_cluster_matrix()
|
||||
{
|
||||
case -2:
|
||||
c = '?';
|
||||
error_found = true;
|
||||
break;
|
||||
case -1:
|
||||
c = 'x';
|
||||
error_found = true;
|
||||
break;
|
||||
case 0:
|
||||
c = '*';
|
||||
@@ -770,6 +783,11 @@ do_cluster_matrix()
|
||||
}
|
||||
|
||||
free(matrix_rec_list);
|
||||
|
||||
if (error_found == true)
|
||||
{
|
||||
exit(ERR_CLUSTER_CHECK);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
1129
repmgr-action-node.c
1129
repmgr-action-node.c
File diff suppressed because it is too large
Load Diff
@@ -87,7 +87,7 @@ static void initialise_direct_clone(t_node_info *node_record);
|
||||
static int run_basebackup(t_node_info *node_record);
|
||||
static int run_file_backup(t_node_info *node_record);
|
||||
|
||||
static void copy_configuration_files(void);
|
||||
static void copy_configuration_files(bool delete_after_copy);
|
||||
|
||||
static void drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name);
|
||||
|
||||
@@ -498,7 +498,33 @@ do_standby_clone(void)
|
||||
|
||||
termPQExpBuffer(&msg);
|
||||
|
||||
/* TODO: check all files are readable */
|
||||
/*
|
||||
* Here we'll attempt an initial test copy of the detected external
|
||||
* files, to detect any issues before we run the base backup.
|
||||
*
|
||||
* Note this will exit with an error, unless -F/--force supplied.
|
||||
*
|
||||
* TODO: put the files in a temporary directory and move to their final
|
||||
* destination once the database has been cloned.
|
||||
*/
|
||||
|
||||
if (runtime_options.copy_external_config_files_destination == CONFIG_FILE_SAMEPATH)
|
||||
{
|
||||
/*
|
||||
* Files will be placed in the same path as on the source server;
|
||||
* don't delete after copying.
|
||||
*/
|
||||
copy_configuration_files(false);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Files will be placed in the data directory - delete after copying.
|
||||
* They'll be copied again later; see TODO above.
|
||||
*/
|
||||
copy_configuration_files(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -597,7 +623,12 @@ do_standby_clone(void)
|
||||
*/
|
||||
if (runtime_options.copy_external_config_files == true && config_files.entries > 0)
|
||||
{
|
||||
copy_configuration_files();
|
||||
/*
|
||||
* If "--copy-external-config-files=samepath" was used, the files will already
|
||||
* have been copied.
|
||||
*/
|
||||
if (runtime_options.copy_external_config_files_destination == CONFIG_FILE_PGDATA)
|
||||
copy_configuration_files(false);
|
||||
}
|
||||
|
||||
/* Write the recovery.conf file */
|
||||
@@ -938,7 +969,6 @@ _do_create_recovery_conf(void)
|
||||
log_detail("%s", PQerrorMessage(source_conn));
|
||||
}
|
||||
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
@@ -955,7 +985,10 @@ _do_create_recovery_conf(void)
|
||||
{
|
||||
log_detail("%s", PQerrorMessage(source_conn));
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
log_hint(_("standby must be registered before a new recovery.conf file can be created"));
|
||||
}
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
@@ -2126,7 +2159,13 @@ do_standby_follow(void)
|
||||
|
||||
log_verbose(LOG_DEBUG, "do_standby_follow()");
|
||||
|
||||
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
||||
local_conn = establish_db_connection(config_file_options.conninfo, false);
|
||||
|
||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_hint(_("use \"repmgr node rejoin\" to re-add an inactive node to the replication cluster"));
|
||||
exit(ERR_DB_CONN);
|
||||
}
|
||||
|
||||
log_verbose(LOG_INFO, _("connected to local node"));
|
||||
|
||||
@@ -2218,7 +2257,7 @@ do_standby_follow(void)
|
||||
|
||||
if (config_file_options.use_replication_slots)
|
||||
{
|
||||
int free_slots = get_free_replication_slots(primary_conn);
|
||||
int free_slots = get_free_replication_slot_count(primary_conn);
|
||||
if (free_slots < 0)
|
||||
{
|
||||
log_error(_("unable to determine number of free replication slots on the primary"));
|
||||
@@ -2313,6 +2352,74 @@ do_standby_follow(void)
|
||||
&follow_output,
|
||||
&follow_error_code);
|
||||
|
||||
/* unable to restart the standby */
|
||||
if (success == false)
|
||||
{
|
||||
create_event_notification_extended(
|
||||
primary_conn,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"standby_follow",
|
||||
success,
|
||||
follow_output.data,
|
||||
&event_info);
|
||||
|
||||
PQfinish(primary_conn);
|
||||
|
||||
log_notice(_("STANDBY FOLLOW failed"));
|
||||
if (strlen( follow_output.data ))
|
||||
log_detail("%s", follow_output.data);
|
||||
|
||||
termPQExpBuffer(&follow_output);
|
||||
exit(follow_error_code);
|
||||
}
|
||||
|
||||
termPQExpBuffer(&follow_output);
|
||||
|
||||
initPQExpBuffer(&follow_output);
|
||||
|
||||
/*
|
||||
* Wait up to "standby_follow_timeout" seconds for standby to connect to
|
||||
* upstream.
|
||||
* For 9.6 and later, we could check pg_stat_wal_receiver on the local node.
|
||||
*/
|
||||
|
||||
/* assume success, necessary if standby_follow_timeout is zero */
|
||||
success = true;
|
||||
|
||||
for (timer = 0; timer < config_file_options.standby_follow_timeout; timer++)
|
||||
{
|
||||
success = is_downstream_node_attached(primary_conn, config_file_options.node_name);
|
||||
if (success == true)
|
||||
break;
|
||||
|
||||
log_verbose(LOG_DEBUG, "sleeping %i of max %i seconds waiting for standby to attach to primary",
|
||||
timer + 1,
|
||||
config_file_options.standby_follow_timeout);
|
||||
sleep(1);
|
||||
}
|
||||
|
||||
if (success == true)
|
||||
{
|
||||
log_notice(_("STANDBY FOLLOW successful"));
|
||||
appendPQExpBuffer(&follow_output,
|
||||
"standby attached to upstream node \"%s\" (node ID: %i)",
|
||||
primary_node_record.node_name,
|
||||
primary_node_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_error(_("STANDBY FOLLOW failed"));
|
||||
appendPQExpBuffer(&follow_output,
|
||||
"standby did not attach to upstream node \"%s\" (node ID: %i) after %i seconds",
|
||||
primary_node_record.node_name,
|
||||
primary_node_id,
|
||||
config_file_options.standby_follow_timeout);
|
||||
|
||||
}
|
||||
|
||||
log_detail("%s", follow_output.data);
|
||||
|
||||
create_event_notification_extended(
|
||||
primary_conn,
|
||||
&config_file_options,
|
||||
@@ -2324,20 +2431,11 @@ do_standby_follow(void)
|
||||
|
||||
PQfinish(primary_conn);
|
||||
|
||||
if (success == false)
|
||||
{
|
||||
log_notice(_("STANDBY FOLLOW failed"));
|
||||
log_detail("%s", follow_output.data);
|
||||
|
||||
termPQExpBuffer(&follow_output);
|
||||
exit(follow_error_code);
|
||||
}
|
||||
|
||||
log_notice(_("STANDBY FOLLOW successful"));
|
||||
log_detail("%s", follow_output.data);
|
||||
|
||||
termPQExpBuffer(&follow_output);
|
||||
|
||||
if (success == false)
|
||||
exit(ERR_FOLLOW_FAIL);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -3335,8 +3433,6 @@ do_standby_switchover(void)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* check there are sufficient free walsenders - obviously there's potential
|
||||
* for a later race condition if some walsenders come into use before the
|
||||
@@ -3760,7 +3856,6 @@ do_standby_switchover(void)
|
||||
* If --siblings-follow specified, attempt to make them follow the new
|
||||
* primary
|
||||
*/
|
||||
|
||||
if (runtime_options.siblings_follow == true && sibling_nodes.node_count > 0)
|
||||
{
|
||||
int failed_follow_count = 0;
|
||||
@@ -3787,8 +3882,17 @@ do_standby_switchover(void)
|
||||
initPQExpBuffer(&remote_command_str);
|
||||
make_remote_repmgr_path(&remote_command_str, &sibling_node_record);
|
||||
|
||||
appendPQExpBuffer(&remote_command_str,
|
||||
"standby follow 2>/dev/null && echo \"1\" || echo \"0\"");
|
||||
if (sibling_node_record.type == WITNESS)
|
||||
{
|
||||
appendPQExpBuffer(&remote_command_str,
|
||||
"witness register -d \\'%s\\' --force 2>/dev/null && echo \"1\" || echo \"0\"",
|
||||
local_node_record.conninfo);
|
||||
}
|
||||
else
|
||||
{
|
||||
appendPQExpBuffer(&remote_command_str,
|
||||
"standby follow 2>/dev/null && echo \"1\" || echo \"0\"");
|
||||
}
|
||||
get_conninfo_value(cell->node_info->conninfo, "host", host);
|
||||
log_debug("executing:\n %s", remote_command_str.data);
|
||||
|
||||
@@ -3803,8 +3907,16 @@ do_standby_switchover(void)
|
||||
|
||||
if (success == false || command_output.data[0] == '0')
|
||||
{
|
||||
log_warning(_("STANDBY FOLLOW failed on node \"%s\""),
|
||||
cell->node_info->node_name);
|
||||
if (sibling_node_record.type == WITNESS)
|
||||
{
|
||||
log_warning(_("WITNESS REGISTER failed on node \"%s\""),
|
||||
cell->node_info->node_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_warning(_("STANDBY FOLLOW failed on node \"%s\""),
|
||||
cell->node_info->node_name);
|
||||
}
|
||||
failed_follow_count++;
|
||||
}
|
||||
|
||||
@@ -3909,6 +4021,8 @@ check_source_server()
|
||||
PGconn *privileged_conn = NULL;
|
||||
|
||||
char cluster_size[MAXLEN];
|
||||
char *connstr = NULL;
|
||||
|
||||
t_node_info node_record = T_NODE_INFO_INITIALIZER;
|
||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||
ExtensionStatus extension_status = REPMGR_UNKNOWN;
|
||||
@@ -3917,8 +4031,11 @@ check_source_server()
|
||||
log_verbose(LOG_DEBUG, "check_source_server()");
|
||||
log_info(_("connecting to source node"));
|
||||
|
||||
source_conn = establish_db_connection_by_params(&source_conninfo, false);
|
||||
connstr = param_list_to_string(&source_conninfo);
|
||||
log_detail(_("connection string is: %s"), connstr);
|
||||
pfree(connstr);
|
||||
|
||||
source_conn = establish_db_connection_by_params(&source_conninfo, false);
|
||||
/*
|
||||
* Unless in barman mode, exit with an error;
|
||||
* establish_db_connection_by_params() will have already logged an error
|
||||
@@ -4073,13 +4190,25 @@ check_source_server()
|
||||
if (record_status == RECORD_FOUND)
|
||||
{
|
||||
t_conninfo_param_list upstream_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
||||
char *upstream_conninfo_user;
|
||||
|
||||
initialize_conninfo_params(&upstream_conninfo, false);
|
||||
parse_conninfo_string(node_record.conninfo, &upstream_conninfo, NULL, false);
|
||||
|
||||
strncpy(recovery_conninfo_str, node_record.conninfo, MAXLEN);
|
||||
strncpy(upstream_repluser, node_record.repluser, NAMEDATALEN);
|
||||
strncpy(upstream_user, param_get(&upstream_conninfo, "user"), NAMEDATALEN);
|
||||
|
||||
upstream_conninfo_user = param_get(&upstream_conninfo, "user");
|
||||
if (upstream_conninfo_user != NULL)
|
||||
{
|
||||
strncpy(upstream_user, upstream_conninfo_user, NAMEDATALEN);
|
||||
}
|
||||
else
|
||||
{
|
||||
get_conninfo_default_value("user", upstream_user, NAMEDATALEN);
|
||||
}
|
||||
|
||||
log_verbose(LOG_DEBUG, "upstream_user is \"%s\"", upstream_user);
|
||||
|
||||
upstream_conninfo_found = true;
|
||||
}
|
||||
@@ -4632,7 +4761,7 @@ initialise_direct_clone(t_node_info *node_record)
|
||||
}
|
||||
else
|
||||
{
|
||||
TablespaceListCell *cell = false;
|
||||
TablespaceListCell *cell;
|
||||
KeyValueList not_found = {NULL, NULL};
|
||||
int total = 0,
|
||||
matched = 0;
|
||||
@@ -5690,7 +5819,7 @@ get_barman_property(char *dst, char *name, char *local_repmgr_directory)
|
||||
|
||||
|
||||
static void
|
||||
copy_configuration_files(void)
|
||||
copy_configuration_files(bool delete_after_copy)
|
||||
{
|
||||
int i,
|
||||
r;
|
||||
@@ -5735,13 +5864,35 @@ copy_configuration_files(void)
|
||||
r = copy_remote_files(runtime_options.host, runtime_options.remote_user,
|
||||
file->filepath, dest_path.data, false, source_server_version_num);
|
||||
|
||||
termPQExpBuffer(&dest_path);
|
||||
/*
|
||||
* TODO: collate errors into list
|
||||
*/
|
||||
|
||||
if (WEXITSTATUS(r))
|
||||
{
|
||||
log_error(_("standby clone: unable to copy config file \"%s\""),
|
||||
file->filename);
|
||||
log_hint(_("see preceding messages for details"));
|
||||
|
||||
if (runtime_options.force == false)
|
||||
exit(ERR_BAD_RSYNC);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is to check we can actually copy the files before running the
|
||||
* main clone operation
|
||||
*/
|
||||
if (delete_after_copy == true)
|
||||
{
|
||||
/* this is very unlikely to happen, but log in case it does */
|
||||
if (unlink(dest_path.data) < 0 && errno != ENOENT)
|
||||
{
|
||||
log_warning(_("unable to delete %s"), dest_path.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
termPQExpBuffer(&dest_path);
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
@@ -137,7 +137,7 @@ do_witness_register(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO:sanity check witness node is not part of main cluster; we could
|
||||
* TODO: sanity check witness node is not part of main cluster; we could
|
||||
* add a random application_name to the respective connections,
|
||||
* and do a simple check of pg_stat_activity
|
||||
*/
|
||||
@@ -193,8 +193,26 @@ do_witness_register(void)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that an active node with the same node_name doesn't exist already
|
||||
*/
|
||||
|
||||
// XXX check other node with same name does not exist
|
||||
record_status = get_node_record_by_name(primary_conn,
|
||||
config_file_options.node_name,
|
||||
&node_record);
|
||||
|
||||
|
||||
if (record_status == RECORD_FOUND)
|
||||
{
|
||||
if (node_record.active == true && node_record.node_id != config_file_options.node_id)
|
||||
{
|
||||
log_error(_("node %i exists already with node_name \"%s\""),
|
||||
node_record.node_id,
|
||||
config_file_options.node_name);
|
||||
PQfinish(primary_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* if repmgr.nodes contains entries, delete if -F/--force provided,
|
||||
@@ -225,6 +243,7 @@ do_witness_register(void)
|
||||
PQfinish(witness_conn);
|
||||
exit(SUCCESS);
|
||||
}
|
||||
|
||||
/* create record on primary */
|
||||
|
||||
/*
|
||||
|
||||
@@ -634,7 +634,7 @@ main(int argc, char **argv)
|
||||
* If -d/--dbname appears to be a conninfo string, validate by attempting
|
||||
* to parse it (and if successful, store the parsed parameters)
|
||||
*/
|
||||
if (runtime_options.dbname)
|
||||
if (runtime_options.dbname[0])
|
||||
{
|
||||
if (strncmp(runtime_options.dbname, "postgresql://", 13) == 0 ||
|
||||
strncmp(runtime_options.dbname, "postgres://", 11) == 0 ||
|
||||
@@ -1010,7 +1010,6 @@ main(int argc, char **argv)
|
||||
runtime_options.output_mode = OM_OPTFORMAT;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Check for configuration file items which can be overriden by runtime
|
||||
* options
|
||||
@@ -1068,6 +1067,17 @@ main(int argc, char **argv)
|
||||
if (runtime_options.terse)
|
||||
logger_set_terse();
|
||||
|
||||
/*
|
||||
* If --dry-run specified, ensure log_level is at least LOG_INFO, regardless
|
||||
* of what's in the configuration file or -L/--log-level paremeter, otherwise
|
||||
* some or output might not be displayed.
|
||||
*/
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
logger_set_min_level(LOG_INFO);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Node configuration information is not needed for all actions, with
|
||||
* STANDBY CLONE being the main exception.
|
||||
|
||||
@@ -98,7 +98,7 @@
|
||||
#log_facility=STDERR # Logging facility: possible values are STDERR, or for
|
||||
# syslog integration, one of LOCAL0, LOCAL1, ..., LOCAL7, USER
|
||||
|
||||
#log_file='' # stderr can be redirected to an arbitrary file:
|
||||
#log_file='' # stderr can be redirected to an arbitrary file
|
||||
#log_status_interval=300 # interval (in seconds) for repmgrd to log a status message
|
||||
|
||||
|
||||
@@ -213,8 +213,10 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
||||
# These settings apply when instructing a standby to follow the new primary
|
||||
# ("repmgr standby follow").
|
||||
|
||||
#primary_follow_timeout=60 # The length of time (in seconds) to wait
|
||||
#primary_follow_timeout=60 # The max length of time (in seconds) to wait
|
||||
# for the new primary to become available
|
||||
#standby_follow_timeout=15 # The max length of time (in seconds) to wait
|
||||
# for the standby to connect to the primary
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
@@ -251,11 +253,11 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
||||
# primary (or other upstream node)
|
||||
#reconnect_interval=10 # Interval between attempts to reconnect to an unreachable
|
||||
# primary (or other upstream node)
|
||||
#promote_command= # command to execute when promoting a new primary; use something like:
|
||||
#promote_command= # command repmgrd executes when promoting a new primary; use something like:
|
||||
#
|
||||
# repmgr standby promote -f /etc/repmgr.conf
|
||||
#
|
||||
#follow_command= # command to execute when instructing a standby to follow a new primary;
|
||||
#follow_command= # command repmgrd executes when instructing a standby to follow a new primary;
|
||||
# use something like:
|
||||
#
|
||||
# repmgr standby follow -f /etc/repmgr.conf -W --upstream-node-id=%n
|
||||
@@ -308,11 +310,11 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
||||
#service_stop_command = ''
|
||||
#service_restart_command = ''
|
||||
#service_reload_command = ''
|
||||
#service_promote_command = '' # Note: this overrides any value contained in the setting
|
||||
# "promote_command". This is intended for systems which
|
||||
# provide a package-level promote command, such as Debian's
|
||||
# "pg_ctlcluster"
|
||||
|
||||
#service_promote_command = '' # This parameter is intended for systems which provide a
|
||||
# package-level promote command, such as Debian's
|
||||
# "pg_ctlcluster". *IMPORTANT*: it is *not* a substitute
|
||||
# for "promote_command"; do not use "repmgr standby promote"
|
||||
# (or a script which executes "repmgr standby promote") here.
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Status check thresholds
|
||||
|
||||
1
repmgr.h
1
repmgr.h
@@ -70,6 +70,7 @@
|
||||
#define DEFAULT_ASYNC_QUERY_TIMEOUT 60 /* seconds */
|
||||
#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */
|
||||
#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 /* seconds */
|
||||
#define DEFAULT_STANDBY_FOLLOW_TIMEOUT 30 /* seconds */
|
||||
#define DEFAULT_BDR_RECOVERY_TIMEOUT 30 /* seconds */
|
||||
#define DEFAULT_ARCHIVE_READY_WARNING 16 /* WAL files */
|
||||
#define DEFAULT_ARCHIVE_READY_CRITICAL 128 /* WAL files */
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
#define REPMGR_VERSION_DATE ""
|
||||
#define REPMGR_VERSION "4.0.5"
|
||||
#define REPMGR_VERSION "4.0.6"
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ static FailoverState failover_state = FAILOVER_STATE_UNKNOWN;
|
||||
|
||||
static int primary_node_id = UNKNOWN_NODE_ID;
|
||||
static t_node_info upstream_node_info = T_NODE_INFO_INITIALIZER;
|
||||
static NodeInfoList standby_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||
static NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||
|
||||
|
||||
static ElectionResult do_election(void);
|
||||
@@ -816,6 +816,29 @@ monitor_streaming_standby(void)
|
||||
{
|
||||
int degraded_monitoring_elapsed = calculate_elapsed(degraded_monitoring_start);
|
||||
|
||||
if (config_file_options.degraded_monitoring_timeout > 0
|
||||
&& degraded_monitoring_elapsed > config_file_options.degraded_monitoring_timeout)
|
||||
{
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("degraded monitoring timeout (%i seconds) exceeded, terminating"),
|
||||
degraded_monitoring_elapsed);
|
||||
|
||||
log_notice("%s", event_details.data);
|
||||
|
||||
create_event_notification(NULL,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"repmgrd_shutdown",
|
||||
true,
|
||||
event_details.data);
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
terminate(ERR_MONITORING_TIMEOUT);
|
||||
}
|
||||
|
||||
|
||||
log_debug("monitoring node %i in degraded state for %i seconds",
|
||||
upstream_node_info.node_id,
|
||||
degraded_monitoring_elapsed);
|
||||
@@ -918,8 +941,8 @@ monitor_streaming_standby(void)
|
||||
get_active_sibling_node_records(local_conn,
|
||||
local_node_info.node_id,
|
||||
former_upstream_node_id,
|
||||
&standby_nodes);
|
||||
notify_followers(&standby_nodes, local_node_info.node_id);
|
||||
&sibling_nodes);
|
||||
notify_followers(&sibling_nodes, local_node_info.node_id);
|
||||
|
||||
/* this will restart monitoring in primary mode */
|
||||
monitoring_state = MS_NORMAL;
|
||||
@@ -958,12 +981,12 @@ monitor_streaming_standby(void)
|
||||
get_active_sibling_node_records(local_conn,
|
||||
local_node_info.node_id,
|
||||
local_node_info.upstream_node_id,
|
||||
&standby_nodes);
|
||||
&sibling_nodes);
|
||||
|
||||
if (standby_nodes.node_count > 0)
|
||||
if (sibling_nodes.node_count > 0)
|
||||
{
|
||||
log_debug("scanning %i node records to detect new primary...", standby_nodes.node_count);
|
||||
for (cell = standby_nodes.head; cell; cell = cell->next)
|
||||
log_debug("scanning %i node records to detect new primary...", sibling_nodes.node_count);
|
||||
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
/* skip local node check, we did that above */
|
||||
if (cell->node_info->node_id == local_node_info.node_id)
|
||||
@@ -993,7 +1016,7 @@ monitor_streaming_standby(void)
|
||||
follow_new_primary(follow_node_id);
|
||||
}
|
||||
}
|
||||
clear_node_info_list(&standby_nodes);
|
||||
clear_node_info_list(&sibling_nodes);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1395,12 +1418,12 @@ monitor_streaming_witness(void)
|
||||
get_active_sibling_node_records(local_conn,
|
||||
local_node_info.node_id,
|
||||
local_node_info.upstream_node_id,
|
||||
&standby_nodes);
|
||||
&sibling_nodes);
|
||||
|
||||
if (standby_nodes.node_count > 0)
|
||||
if (sibling_nodes.node_count > 0)
|
||||
{
|
||||
log_debug("scanning %i node records to detect new primary...", standby_nodes.node_count);
|
||||
for (cell = standby_nodes.head; cell; cell = cell->next)
|
||||
log_debug("scanning %i node records to detect new primary...", sibling_nodes.node_count);
|
||||
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
/* skip local node check, we did that above */
|
||||
if (cell->node_info->node_id == local_node_info.node_id)
|
||||
@@ -1430,7 +1453,7 @@ monitor_streaming_witness(void)
|
||||
witness_follow_new_primary(follow_node_id);
|
||||
}
|
||||
}
|
||||
clear_node_info_list(&standby_nodes);
|
||||
clear_node_info_list(&sibling_nodes);
|
||||
}
|
||||
}
|
||||
loop:
|
||||
@@ -1531,7 +1554,7 @@ do_primary_failover(void)
|
||||
}
|
||||
else if (election_result == ELECTION_WON)
|
||||
{
|
||||
if (standby_nodes.node_count > 0)
|
||||
if (sibling_nodes.node_count > 0)
|
||||
{
|
||||
log_notice("this node is the winner, will now promote itself and inform other nodes");
|
||||
}
|
||||
@@ -1576,7 +1599,7 @@ do_primary_failover(void)
|
||||
get_active_sibling_node_records(local_conn,
|
||||
local_node_info.node_id,
|
||||
upstream_node_info.node_id,
|
||||
&standby_nodes);
|
||||
&sibling_nodes);
|
||||
|
||||
}
|
||||
else if (config_file_options.failover == FAILOVER_MANUAL)
|
||||
@@ -1638,10 +1661,10 @@ do_primary_failover(void)
|
||||
{
|
||||
case FAILOVER_STATE_PROMOTED:
|
||||
/* notify former siblings that they should now follow this node */
|
||||
notify_followers(&standby_nodes, local_node_info.node_id);
|
||||
notify_followers(&sibling_nodes, local_node_info.node_id);
|
||||
|
||||
/* we no longer care about our former siblings */
|
||||
clear_node_info_list(&standby_nodes);
|
||||
clear_node_info_list(&sibling_nodes);
|
||||
|
||||
/* pass control back down to start_monitoring() */
|
||||
log_info(_("switching to primary monitoring mode"));
|
||||
@@ -1655,10 +1678,10 @@ do_primary_failover(void)
|
||||
* notify siblings that they should resume following the original
|
||||
* primary
|
||||
*/
|
||||
notify_followers(&standby_nodes, upstream_node_info.node_id);
|
||||
notify_followers(&sibling_nodes, upstream_node_info.node_id);
|
||||
|
||||
/* we no longer care about our former siblings */
|
||||
clear_node_info_list(&standby_nodes);
|
||||
clear_node_info_list(&sibling_nodes);
|
||||
|
||||
/* pass control back down to start_monitoring() */
|
||||
log_info(_("resuming standby monitoring mode"));
|
||||
@@ -2543,6 +2566,7 @@ do_election(void)
|
||||
|
||||
/* we're visible */
|
||||
int visible_nodes = 1;
|
||||
int total_nodes = 0;
|
||||
|
||||
NodeInfoListCell *cell = NULL;
|
||||
|
||||
@@ -2593,14 +2617,16 @@ do_election(void)
|
||||
get_active_sibling_node_records(local_conn,
|
||||
local_node_info.node_id,
|
||||
upstream_node_info.node_id,
|
||||
&standby_nodes);
|
||||
&sibling_nodes);
|
||||
|
||||
total_nodes = sibling_nodes.node_count + 1;
|
||||
|
||||
log_debug("do_election(): primary location is %s", upstream_node_info.location);
|
||||
|
||||
local_node_info.last_wal_receive_lsn = InvalidXLogRecPtr;
|
||||
|
||||
/* fast path if no other standbys (or witness) exists - normally win by default */
|
||||
if (standby_nodes.node_count == 0)
|
||||
if (sibling_nodes.node_count == 0)
|
||||
{
|
||||
if (strncmp(upstream_node_info.location, local_node_info.location, MAXLEN) == 0)
|
||||
{
|
||||
@@ -2628,7 +2654,7 @@ do_election(void)
|
||||
}
|
||||
else
|
||||
{
|
||||
/* standby nodes found - check if we're in the primary location befor checking theirs */
|
||||
/* standby nodes found - check if we're in the primary location before checking theirs */
|
||||
if (strncmp(upstream_node_info.location, local_node_info.location, MAXLEN) == 0)
|
||||
{
|
||||
primary_location_seen = true;
|
||||
@@ -2643,7 +2669,7 @@ do_election(void)
|
||||
/* pointer to "winning" node, initially self */
|
||||
candidate_node = &local_node_info;
|
||||
|
||||
for (cell = standby_nodes.head; cell; cell = cell->next)
|
||||
for (cell = sibling_nodes.head; cell; cell = cell->next)
|
||||
{
|
||||
/* assume the worst case */
|
||||
cell->node_info->node_status = NODE_STATUS_UNKNOWN;
|
||||
@@ -2698,7 +2724,7 @@ do_election(void)
|
||||
candidate_node = cell->node_info;
|
||||
}
|
||||
/* LSN is same - tiebreak on priority, then node_id */
|
||||
else if(cell->node_info->last_wal_receive_lsn == candidate_node->last_wal_receive_lsn)
|
||||
else if (cell->node_info->last_wal_receive_lsn == candidate_node->last_wal_receive_lsn)
|
||||
{
|
||||
log_verbose(LOG_DEBUG, "node %i has same LSN as current candidate %i",
|
||||
cell->node_info->node_id,
|
||||
@@ -2750,9 +2776,9 @@ do_election(void)
|
||||
|
||||
log_debug("visible nodes: %i; total nodes: %i",
|
||||
visible_nodes,
|
||||
standby_nodes.node_count);
|
||||
total_nodes);
|
||||
|
||||
if (visible_nodes <= (standby_nodes.node_count / 2.0))
|
||||
if (visible_nodes <= (total_nodes / 2.0))
|
||||
{
|
||||
log_notice(_("unable to reach a qualified majority of nodes"));
|
||||
log_detail(_("node will enter degraded monitoring state waiting for reconnect"));
|
||||
|
||||
Reference in New Issue
Block a user