Compare commits

...

16 Commits

Author SHA1 Message Date
Ian Barwick
ee1a6f9d0f doc: add a link to the current documentation from the contents page 2019-04-03 10:48:36 +09:00
Ian Barwick
49eb408873 doc: fix typo
Per user report on mailing list.
2018-10-23 09:01:00 +09:00
Ian Barwick
fba3d29514 doc: clarify BDR repmgrd configuration
Link directly to section about configuring the "event_notification_command".
2018-07-23 13:23:28 +09:00
Ian Barwick
77200e5030 doc: remove duplicate item in list of event notifications 2018-07-18 16:11:18 +09:00
Ian Barwick
4589b8d439 doc: update documentation of "promote_command" and "service_promote_command"
See commit 63242e2277
2018-07-16 14:55:07 +09:00
Ian Barwick
048f7c3310 doc: add extra emphasis about not running repmgrd during switchover
One day this will no longer be an issue, until then let's hope the
fine documentation is read.
2018-07-11 09:55:37 +09:00
Ian Barwick
1e5f63792f node check: implement CSV output
This is advertised in the --help output and placeholder code was in
place, but it wasn't actually implemented.
2018-06-22 15:46:50 +09:00
Ian Barwick
d26989bd12 node status: improve output and documentation
In the default text output mode, list inactive slots.

In CSV output mode, list inactive slots as additional information;
add output line with number of missing slots and a list thereof.

Also document --csv output mode.
2018-06-22 15:46:44 +09:00
Ian Barwick
f999c810a7 node check: clarify status information for witness server
Previously the output gave the impression the server was a primary,
which is technically the case, but it's not the actual cluster primary.

Also output an error if the node is in recovery, which is unlikely but
you never know.
2018-06-22 15:46:40 +09:00
Ian Barwick
81077d4bc2 standby switchover: fix behaviour if witness node is a sibling
The witness node is not a streaming replication standby, so executing
"repmgr standby follow" will fail. Instead, execute "repmgr witness
register --force" to update the witness node record on the primary and
its local copy of all node records.

Addresses GitHub #453.
2018-06-21 17:16:18 +09:00
Ian Barwick
a549941d4f repmgr: don't count witness node as a standby when running "node status"
Addresses GitHub #451.
2018-06-21 14:27:47 +09:00
Ian Barwick
2f6c159f9a "repmgr node ...": update comments and formatting 2018-06-21 14:27:42 +09:00
Ian Barwick
2eca1a0311 repmgr: don't count witness node as a standby when running "node check"
Addresses GitHub #451.
2018-06-21 11:31:09 +09:00
Ian Barwick
f6377084ec doc: remove info about old RPM package repository 2018-06-15 11:14:10 +09:00
Ian Barwick
d85c02b92b doc: finalize release notes 2018-06-15 10:52:51 +09:00
Ian Barwick
d9ba41fc35 doc: emphasize that repmgrd should not be running during a switchover 2018-06-11 15:31:22 +09:00
17 changed files with 673 additions and 487 deletions

View File

@@ -2178,8 +2178,9 @@ get_downstream_nodes_with_missing_slot(PGconn *conn, int this_node_id, NodeInfoL
"LEFT JOIN pg_catalog.pg_replication_slots rs "
" ON rs.slot_name = n.slot_name "
" WHERE n.slot_name IS NOT NULL"
" AND rs.slot_name IS NULL "
" AND n.upstream_node_id = %i ",
" AND rs.slot_name IS NULL "
" AND n.upstream_node_id = %i "
" AND n.type = 'standby'",
this_node_id);
log_verbose(LOG_DEBUG, "get_all_node_records_with_missing_slot():\n%s", query.data);
@@ -2916,8 +2917,7 @@ get_datadir_configuration_files(PGconn *conn, KeyValueList *list)
for (i = 0; i < PQntuples(res); i++)
{
key_value_list_set(
list,
key_value_list_set(list,
PQgetvalue(res, i, 1),
PQgetvalue(res, i, 0));
}
@@ -3654,7 +3654,7 @@ get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record)
int
get_free_replication_slots(PGconn *conn)
get_free_replication_slot_count(PGconn *conn)
{
PQExpBufferData query;
PGresult *res = NULL;
@@ -3691,6 +3691,47 @@ get_free_replication_slots(PGconn *conn)
}
int
get_inactive_replication_slots(PGconn *conn, KeyValueList *list)
{
PQExpBufferData query;
PGresult *res = NULL;
int i, inactive_slots = 0;
initPQExpBuffer(&query);
appendPQExpBuffer(&query,
" SELECT slot_name, slot_type "
" FROM pg_catalog.pg_replication_slots "
" WHERE active IS FALSE "
" ORDER BY slot_name ");
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_error(_("unable to execute replication slot query"));
log_detail("%s", PQerrorMessage(conn));
PQclear(res);
return -1;
}
inactive_slots = PQntuples(res);
for (i = 0; i < inactive_slots; i++)
{
key_value_list_set(list,
PQgetvalue(res, i, 0),
PQgetvalue(res, i, 1));
}
PQclear(res);
return inactive_slots;
}
/* ==================== */
/* tablespace functions */
/* ==================== */

View File

@@ -455,7 +455,8 @@ void create_slot_name(char *slot_name, int node_id);
bool create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, PQExpBufferData *error_msg);
bool drop_replication_slot(PGconn *conn, char *slot_name);
RecordStatus get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
int get_free_replication_slots(PGconn *conn);
int get_free_replication_slot_count(PGconn *conn);
int get_inactive_replication_slots(PGconn *conn, KeyValueList *list);
/* tablespace functions */
bool get_tablespace_name_by_location(PGconn *conn, const char *location, char *name);

View File

@@ -17,7 +17,7 @@
<sect1 id="release-4.0.6">
<title>Release 4.0.6</title>
<para><emphasis>June ??, 2018</emphasis></para>
<para><emphasis>June 14, 2018</emphasis></para>
<para>
&repmgr; 4.0.6 contains a number of bug fixes and usability enhancements.
</para>
@@ -58,6 +58,23 @@
</para>
</listitem>
<listitem>
<para>
<command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>:
Improve documentation of <option>--recovery-conf-only</option> mode
(GitHub #438)
</para>
</listitem>
<listitem>
<para>
<command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>:
Don't require presence of <varname>user</varname> parameter in conninfo string
(GitHub #437)
</para>
</listitem>
</itemizedlist>
</para>
</sect2>
@@ -71,7 +88,7 @@
<listitem>
<para>
<command><link linkend="repmgr-witness-register">repmgr witness register</link></command>:
prevent registration of a witness server with the same name as an existing node.
prevent registration of a witness server with the same name as an existing node
</para>
</listitem>
@@ -84,23 +101,6 @@
</para>
</listitem>
<listitem>
<para>
<command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>:
Don't require presence of <varname>user</varname> parameter in conninfo string
(GitHub #437)
</para>
</listitem>
<listitem>
<para>
<command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>:
Improve documentation of <option>--recovery-conf-only</option> mode
(GitHub #438)
</para>
</listitem>
<listitem>
<para>
<command><link linkend="repmgr-node-rejoin">repmgr node rejoin</link></command>:

View File

@@ -33,34 +33,5 @@
</sect1>
<sect1 id="repmgr-rpm-key" xreflabel="repmgr rpm key">
<title>repmgr RPM signing key</title>
<para>
The signing key ID used for <application>repmgr</application> source code bundles is:
<ulink url="http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr">
<literal>0x702D883A</literal></ulink>.
</para>
<para>
To download the <application>repmgr</application> source key to your computer:
<programlisting>
curl -s http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr | gpg --import
gpg --fingerprint 0x702D883A
</programlisting>
then verify that the fingerprint is the expected value:
<programlisting>
AE4E 390E A58E 0037 6148 3F29 888D 018B 702D 883A</programlisting>
</para>
<para>
To check a repository RPM, use <application>rpmkeys</application> to load the
packaging signing key into the RPM database then use <literal>rpm -K</literal>, e.g.:
<programlisting>
sudo rpmkeys --import http://packages.2ndquadrant.com/repmgr/RPM-GPG-KEY-repmgr
rpm -K postgresql-bdr94-2ndquadrant-redhat-1.0-2.noarch.rpm
</programlisting>
</para>
</sect1>
</appendix>

View File

@@ -25,7 +25,7 @@
<note>
<para>
If using <application>systemd</application>, ensure you have <varname>RemoteIPC</varname> set to <literal>off</literal>.
If using <application>systemd</application>, ensure you have <varname>RemoveIPC</varname> set to <literal>off</literal>.
See the <ulink url="https://wiki.postgresql.org/wiki/Systemd">systemd</ulink>
entry in the <ulink url="https://wiki.postgresql.org/wiki/Main_Page">PostgreSQL wiki</ulink> for details.
</para>
@@ -47,16 +47,24 @@
service_restart_command
service_reload_command</programlisting>
</para>
<note>
<para>
It's also possible to specify a <varname>service_promote_command</varname>;
this overrides any value contained in the setting <varname>promote_command</varname>.
It's also possible to specify a <varname>service_promote_command</varname>.
This is intended for systems which provide a package-level promote command,
such as Debian's <application>pg_ctlcluster</application>.
such as Debian's <application>pg_ctlcluster</application>, to promote the
PostgreSQL from standby to primary.
</para>
<para>
If your packaging system does not provide such a command, it can be left empty,
and &repmgr; will generate the appropriate <command>pg_ctl ... promote</command> command.
</para>
<para>
Do not confuse this with <varname>promote_command</varname>, which is used
by <application>repmgrd</application> to execute <xref linkend="repmgr-standby-promote">.
</para>
</note>
<para>
To confirm which command &repmgr; will execute for each action, use
<command>repmgr node service --list --action=...</command>, e.g.:

View File

@@ -217,9 +217,6 @@
<listitem>
<simpara><literal>repmgrd_promote_error</literal></simpara>
</listitem>
<listitem>
<simpara><literal>repmgrd_failover_promote</literal></simpara>
</listitem>
<listitem>
<simpara><literal>bdr_failover</literal></simpara>
</listitem>

View File

@@ -79,9 +79,26 @@
</itemizedlist>
</para>
<para>
Individual checks can also be output in a Nagios-compatible format by additionally
providing the option <literal>--nagios</literal>.
</para>
</refsect1>
<refsect1>
<title>Output format</title>
<para>
<itemizedlist spacing="compact" mark="bullet">
<listitem>
<simpara>
<literal>--csv</literal>: generate output in CSV format (not available
for individual checks)
</simpara>
</listitem>
<listitem>
<simpara>
<literal>--nagios</literal>: generate output in a Nagios-compatible format
</simpara>
</listitem>
</itemizedlist>
</para>
</refsect1>
</refentry>

View File

@@ -24,7 +24,7 @@
<title>Example</title>
<para>
<programlisting>
$ repmgr -f /etc/repmgr.comf node status
$ repmgr -f /etc/repmgr.conf node status
Node "node1":
PostgreSQL version: 10beta1
Total data size: 30 MB
@@ -38,6 +38,20 @@
</para>
</refsect1>
<refsect1>
<title>Output format</title>
<para>
<itemizedlist spacing="compact" mark="bullet">
<listitem>
<simpara>
<literal>--csv</literal>: generate output in CSV format
</simpara>
</listitem>
</itemizedlist>
</para>
</refsect1>
<refsect1>
<title>See also</title>
<para>

View File

@@ -32,6 +32,7 @@
check the promotion every <varname>promote_check_interval</varname> seconds (default: 1 second).
Both values can be defined in <filename>repmgr.conf</filename>.
</para>
</refsect1>
<refsect1>

View File

@@ -12,6 +12,7 @@
<refpurpose>promote a standby to primary and demote the existing primary to a standby</refpurpose>
</refnamediv>
<refsect1>
<title>Description</title>
@@ -39,6 +40,14 @@
For more details on performing a switchover, including preparation and configuration,
see section <xref linkend="performing-switchover">.
</para>
<note>
<para>
<application>repmgrd</application> should not be active on any nodes while a switchover is being
executed. This restriction may be lifted in a later version.
</para>
</note>
</refsect1>
<refsect1>
@@ -171,10 +180,12 @@
Execute with the <literal>--dry-run</literal> option to test the switchover as far as
possible without actually changing the status of either node.
</para>
<para>
<application>repmgrd</application> should not be active on any nodes while a switchover is being
executed. This restriction may be lifted in a later version.
</para>
<important>
<para>
<application>repmgrd</application> must be shut down on all nodes while a switchover is being
executed. This restriction will be removed in a future &repmgr; version.
</para>
</important>
<para>
External database connections, e.g. from an application, should not be permitted while
the switchover is taking place. In particular, active transactions on the primary

View File

@@ -25,7 +25,13 @@
<para>
This is the official documentation of &repmgr; &repmgrversion; for
use with PostgreSQL 9.3 - PostgreSQL 10.
It describes the functionality supported by the current version of &repmgr;.
</para>
<para>
&repmgr; is being continually developed and we strongly recommend using the
latest version. Please check the
<ulink url="https://repmgr.org/">repmgr website</ulink> for details
about the current &repmgr; version as well as the
<ulink url="https://repmgr.org/docs/current/index.html">current documentation</ulink>.
</para>
<para>

View File

@@ -99,15 +99,16 @@
replication cluster. The database must be the BDR-enabled database.
</para>
<para>
If defined, the evenr <application>event_notifications</application> parameter
will restrict execution of <varname>event_notification_command</varname>
If defined, the <varname>event_notifications</varname> parameter will restrict
execution of the script defined in <varname>event_notification_command</varname>
to the specified event(s).
</para>
<note>
<simpara>
<varname>event_notification_command</varname> is the script which does the actual "heavy lifting"
of reconfiguring the proxy server/ connection pooler. It is fully
user-definable; a reference implementation is documented below.
user-definable; see section <xref linkend="bdr-event-notification-command"> for a reference
implementation.
</simpara>
</note>
@@ -169,8 +170,8 @@
</para>
</sect1>
<sect1 id="bdr-event-notification-command" xreflabel="BDR failover event notification command">
<title>Defining the "event_notification_command"</title>
<sect1 id="bdr-event-notification-command" xreflabel="Defining the BDR failover &quot;event_notification command&quot;">
<title>Defining the BDR failover "event_notification_command"</title>
<para>
Key to "failover" execution is the <literal>event_notification_command</literal>,
which is a user-definable script specified in <filename>repmpgr.conf</filename>

View File

@@ -34,6 +34,24 @@
the <ulink url="https://www.postgresql.org/docs/current/static/runtime-config-client.html#GUC-SHARED-PRELOAD-LIBRARIES">PostgreSQL documentation</ulink>.
</para>
<para>
To apply configuration file changes to a running <application>repmgrd</application>
daemon, execute the operating system's r<application>repmgrd</application> service reload command
(see <xref linkend="appendix-packages"> for examples),
or for instances which were manually started, execute <command>kill -HUP</command>, e.g.
<command>kill -HUP `cat /tmp/repmgrd.pid`</command>.
</para>
<note>
<para>
Check the <application>repmgrd</application> log to see what changes were
applied, or if any issues were encountered when reloading the configuration.
</para>
</note>
<para>
Note that only a subset of configuration file parameters can be changed on a
running <application>repmgrd</application> daemon.
</para>
<sect2 id="repmgrd-automatic-failover-configuration">
<title>automatic failover configuration</title>
<para>
@@ -162,13 +180,6 @@
repmgrd -f /etc/repmgr.conf --pid-file /tmp/repmgrd.pid --daemonize</programlisting>
and stopped with <command>kill `cat /tmp/repmgrd.pid`</command>. Adjust paths as appropriate.
</para>
<para>
To apply configuration file changes to a running <application>repmgrd</application>
daemon, execute the operating system's service reload command (for manually started
instances, execute <command>kill -HUP `cat /tmp/repmgrd.pid`</command>).
Note that only a subset of configuration file parameters can be changed on a
running <application>repmgrd</application> daemon.
</para>
<sect2 id="repmgrd-configuration-debian-ubuntu">
<indexterm>

View File

@@ -140,10 +140,12 @@
manually with <command>repmgr node check --archive-ready</command>.
</para>
<para>
Ensure that <application>repmgrd</application> is *not* running anywhere to prevent it unintentionally
promoting a node.
</para>
<note>
<para>
Ensure that <application>repmgrd</application> is *not* running anywhere to prevent it unintentionally
promoting a node. This restriction will be removed in a future &repmgr; version.
</para>
</note>
<para>
Finally, consider executing <command>repmgr standby switchover</command> with the

File diff suppressed because it is too large Load Diff

View File

@@ -2257,7 +2257,7 @@ do_standby_follow(void)
if (config_file_options.use_replication_slots)
{
int free_slots = get_free_replication_slots(primary_conn);
int free_slots = get_free_replication_slot_count(primary_conn);
if (free_slots < 0)
{
log_error(_("unable to determine number of free replication slots on the primary"));
@@ -3433,8 +3433,6 @@ do_standby_switchover(void)
}
}
/*
* check there are sufficient free walsenders - obviously there's potential
* for a later race condition if some walsenders come into use before the
@@ -3858,7 +3856,6 @@ do_standby_switchover(void)
* If --siblings-follow specified, attempt to make them follow the new
* primary
*/
if (runtime_options.siblings_follow == true && sibling_nodes.node_count > 0)
{
int failed_follow_count = 0;
@@ -3885,8 +3882,17 @@ do_standby_switchover(void)
initPQExpBuffer(&remote_command_str);
make_remote_repmgr_path(&remote_command_str, &sibling_node_record);
appendPQExpBuffer(&remote_command_str,
"standby follow 2>/dev/null && echo \"1\" || echo \"0\"");
if (sibling_node_record.type == WITNESS)
{
appendPQExpBuffer(&remote_command_str,
"witness register -d \\'%s\\' --force 2>/dev/null && echo \"1\" || echo \"0\"",
local_node_record.conninfo);
}
else
{
appendPQExpBuffer(&remote_command_str,
"standby follow 2>/dev/null && echo \"1\" || echo \"0\"");
}
get_conninfo_value(cell->node_info->conninfo, "host", host);
log_debug("executing:\n %s", remote_command_str.data);
@@ -3901,8 +3907,16 @@ do_standby_switchover(void)
if (success == false || command_output.data[0] == '0')
{
log_warning(_("STANDBY FOLLOW failed on node \"%s\""),
cell->node_info->node_name);
if (sibling_node_record.type == WITNESS)
{
log_warning(_("WITNESS REGISTER failed on node \"%s\""),
cell->node_info->node_name);
}
else
{
log_warning(_("STANDBY FOLLOW failed on node \"%s\""),
cell->node_info->node_name);
}
failed_follow_count++;
}

View File

@@ -253,11 +253,11 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
# primary (or other upstream node)
#reconnect_interval=10 # Interval between attempts to reconnect to an unreachable
# primary (or other upstream node)
#promote_command= # command to execute when promoting a new primary; use something like:
#promote_command= # command repmgrd executes when promoting a new primary; use something like:
#
# repmgr standby promote -f /etc/repmgr.conf
#
#follow_command= # command to execute when instructing a standby to follow a new primary;
#follow_command= # command repmgrd executes when instructing a standby to follow a new primary;
# use something like:
#
# repmgr standby follow -f /etc/repmgr.conf -W --upstream-node-id=%n
@@ -310,11 +310,11 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
#service_stop_command = ''
#service_restart_command = ''
#service_reload_command = ''
#service_promote_command = '' # Note: this overrides any value contained in the setting
# "promote_command". This is intended for systems which
# provide a package-level promote command, such as Debian's
# "pg_ctlcluster"
#service_promote_command = '' # This parameter is intended for systems which provide a
# package-level promote command, such as Debian's
# "pg_ctlcluster". *IMPORTANT*: it is *not* a substitute
# for "promote_command"; do not use "repmgr standby promote"
# (or a script which executes "repmgr standby promote") here.
#------------------------------------------------------------------------------
# Status check thresholds