Add functionality to "pause" repmgrd

In some circumstances, e.g. while performing a switchover, it is essential
that repmgrd does not take any kind of failover action, as this will put
the cluster into an incorrect state.

Previously it was necessary to stop repmgrd on all nodes (or at least
those nodes which repmgrd would consider as promotion candidates), however
this is a cumbersome and potentially risk-prone operation, particularly if the
replication cluster contains more than a couple of servers.

To prevent this issue from occurring, this patch introduces the ability
to "pause" repmgrd on all nodes wth a single command ("repmgr daemon pause")
which notifies repmgrd not to take any failover action until the node
is "unpaused" ("repmgr daemon unpause").

"repmgr daemon status" provides an overview of each node and whether repmgrd
is running, and if so whether it is paused.

"repmgr standby switchover" has been modified to automatically pause repmgrd
while carrying out the switchover.

See documentation for further details.
This commit is contained in:
Ian Barwick
2018-09-27 16:42:10 +09:00
parent fce3c02760
commit 2491b8ae52
27 changed files with 1943 additions and 121 deletions

View File

@@ -17,7 +17,6 @@ DATA = \
repmgr--4.1--4.2.sql \
repmgr--4.2.sql
REGRESS = repmgr_extension
# Hacky workaround to install the binaries
@@ -43,7 +42,7 @@ $(info Building against PostgreSQL $(MAJORVERSION))
REPMGR_CLIENT_OBJS = repmgr-client.o \
repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o \
repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o \
repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o repmgr-action-daemon.o \
configfile.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o
REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o controldata.o compat.o
DATE=$(shell date "+%Y-%m-%d")
@@ -87,6 +86,7 @@ additional-clean:
rm -f repmgr-action-bdr.o
rm -f repmgr-action-node.o
rm -f repmgr-action-cluster.o
rm -f repmgr-action-daemon.o
rm -f repmgrd.o
rm -f repmgrd-physical.o
rm -f repmgrd-bdr.o

130
dbutils.c
View File

@@ -1627,7 +1627,6 @@ repmgrd_set_local_node_id(PGconn *conn, int local_node_id)
}
int
repmgrd_get_local_node_id(PGconn *conn)
{
@@ -1686,6 +1685,135 @@ server_in_exclusive_backup_mode(PGconn *conn)
}
void
repmgrd_set_pid(PGconn *conn, pid_t repmgrd_pid, const char *pidfile)
{
PQExpBufferData query;
PGresult *res = NULL;
log_verbose(LOG_DEBUG, "repmgrd_set_pid(): pid is %i", (int) repmgrd_pid);
initPQExpBuffer(&query);
appendPQExpBuffer(&query,
"SELECT repmgr.set_repmgrd_pid(%i, '%s')",
(int) repmgrd_pid, pidfile);
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_error(_("unable to execute \"SELECT repmgr.set_repmgrd_pid()\""));
log_detail("%s", PQerrorMessage(conn));
}
PQclear(res);
return;
}
pid_t
repmgrd_get_pid(PGconn *conn)
{
PGresult *res = NULL;
pid_t repmgrd_pid = UNKNOWN_PID;
res = PQexec(conn, "SELECT repmgr.get_repmgrd_pid()");
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_error(_("unable to execute \"SELECT repmgr.get_repmgrd_pid()\""));
log_detail("%s", PQerrorMessage(conn));
}
else if (!PQgetisnull(res, 0, 0))
{
repmgrd_pid = atoi(PQgetvalue(res, 0, 0));
}
PQclear(res);
return repmgrd_pid;
}
bool
repmgrd_is_running(PGconn *conn)
{
PGresult *res = NULL;
bool is_running = false;
res = PQexec(conn, "SELECT repmgr.repmgrd_is_running()");
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_error(_("unable to execute \"SELECT repmgr.repmgrd_is_running()\""));
log_detail("%s", PQerrorMessage(conn));
}
else if (!PQgetisnull(res, 0, 0))
{
is_running = atobool(PQgetvalue(res, 0, 0));
}
PQclear(res);
return is_running;
}
bool
repmgrd_is_paused(PGconn *conn)
{
PGresult *res = NULL;
bool is_paused = false;
res = PQexec(conn, "SELECT repmgr.repmgrd_is_paused()");
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_error(_("unable to execute \"SELECT repmgr.repmgrd_is_paused()\""));
log_detail("%s", PQerrorMessage(conn));
}
else if (!PQgetisnull(res, 0, 0))
{
is_paused = atobool(PQgetvalue(res, 0, 0));
}
PQclear(res);
return is_paused;
}
bool
repmgrd_pause(PGconn *conn, bool pause)
{
PQExpBufferData query;
PGresult *res = NULL;
bool success = true;
initPQExpBuffer(&query);
appendPQExpBuffer(&query,
"SELECT repmgr.repmgrd_pause(%s)",
pause == true ? "TRUE" : "FALSE");
res = PQexec(conn, query.data);
termPQExpBuffer(&query);
if (PQresultStatus(res) != PGRES_TUPLES_OK)
{
log_error(_("unable to execute \"SELECT repmgr.repmgrd_pause()\""));
log_detail("%s", PQerrorMessage(conn));
success = false;
}
PQclear(res);
return success;
}
/* ================ */
/* result functions */
/* ================ */

View File

@@ -327,6 +327,21 @@ typedef struct
UNKNOWN_TIMELINE_ID, \
InvalidXLogRecPtr \
}
typedef struct RepmgrdInfo {
int node_id;
int pid;
char pid_text[MAXLEN];
char pid_file[MAXLEN];
bool pg_running;
char pg_running_text[MAXLEN];
bool running;
char repmgrd_running[MAXLEN];
bool paused;
} RepmgrdInfo;
/* global variables */
extern int server_version_num;
@@ -399,6 +414,11 @@ bool identify_system(PGconn *repl_conn, t_system_identification *identification
bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
int repmgrd_get_local_node_id(PGconn *conn);
BackupState server_in_exclusive_backup_mode(PGconn *conn);
void repmgrd_set_pid(PGconn *conn, pid_t repmgrd_pid, const char *pidfile);
pid_t repmgrd_get_pid(PGconn *conn);
bool repmgrd_is_running(PGconn *conn);
bool repmgrd_is_paused(PGconn *conn);
bool repmgrd_pause(PGconn *conn, bool pause);
/* extension functions */
ExtensionStatus get_repmgr_extension_status(PGconn *conn);

View File

@@ -58,6 +58,7 @@
<!ENTITY repmgrd-cascading-replication SYSTEM "repmgrd-cascading-replication.sgml">
<!ENTITY repmgrd-network-split SYSTEM "repmgrd-network-split.sgml">
<!ENTITY repmgrd-witness-server SYSTEM "repmgrd-witness-server.sgml">
<!ENTITY repmgrd-pausing SYSTEM "repmgrd-pausing.sgml">
<!ENTITY repmgrd-bdr SYSTEM "repmgrd-bdr.sgml">
<!ENTITY repmgr-primary-register SYSTEM "repmgr-primary-register.sgml">
@@ -78,6 +79,9 @@
<!ENTITY repmgr-cluster-crosscheck SYSTEM "repmgr-cluster-crosscheck.sgml">
<!ENTITY repmgr-cluster-event SYSTEM "repmgr-cluster-event.sgml">
<!ENTITY repmgr-cluster-cleanup SYSTEM "repmgr-cluster-cleanup.sgml">
<!ENTITY repmgr-daemon-status SYSTEM "repmgr-daemon-status.sgml">
<!ENTITY repmgr-daemon-pause SYSTEM "repmgr-daemon-pause.sgml">
<!ENTITY repmgr-daemon-unpause SYSTEM "repmgr-daemon-unpause.sgml">
<!ENTITY appendix-release-notes SYSTEM "appendix-release-notes.sgml">
<!ENTITY appendix-faq SYSTEM "appendix-faq.sgml">

View File

@@ -90,7 +90,7 @@
<para>
<command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
outputs the replication cluster's status in a simple CSV format, suitable for
parsing by scripts:
parsing by scripts, e.g.:
<programlisting>
$ repmgr -f /etc/repmgr.conf cluster show --csv
1,-1,-1
@@ -165,7 +165,7 @@
<refsect1>
<title>See also</title>
<para>
<xref linkend="repmgr-node-status">, <xref linkend="repmgr-node-check">
<xref linkend="repmgr-node-status">, <xref linkend="repmgr-node-check">, <xref linkend="repmgr-daemon-status">
</para>
</refsect1>

View File

@@ -0,0 +1,109 @@
<refentry id="repmgr-daemon-pause">
<indexterm>
<primary>repmgr daemon pause</primary>
</indexterm>
<refmeta>
<refentrytitle>repmgr daemon pause</refentrytitle>
</refmeta>
<refnamediv>
<refname>repmgr daemon pause</refname>
<refpurpose>Instruct all <application>repmgrd</application> instances in the replication cluster to pause failover operations</refpurpose>
</refnamediv>
<refsect1>
<title>Description</title>
<para>
This command can be run on any active node in the replication cluster to instruct all
running <application>repmgrd</application> instances to &quot;pause&quot; themselves, i.e. take no
action (such as promoting themselves or following a new primary) if a failover event is detected.
</para>
<para>
This functionality is useful for performing maintenance operations, such as switchovers
or upgrades, which might otherwise trigger a failover if <application>repmgrd</application>
is running normally.
</para>
<note>
<para>
It's important to wait a few seconds after restarting PostgreSQL on any node before running
<command>repmgr daemon pause</command>, as the <application>repmgrd</application> instance
on the restarted node will take a second or two before it has updated its status.
</para>
</note>
<para>
<xref linkend="repmgr-daemon-unpause"> will instruct all previously paused <application>repmgrd</application>
instances to resume normal failover operation.
</para>
</refsect1>
<refsect1>
<title>Execution</title>
<para>
<command>repmgr daemon pause</command> can be executed on any active node in the
replication cluster. A valid <filename>repmgr.conf</filename> file is required.
It will have no effect on previously paused nodes.
</para>
</refsect1>
<refsect1>
<title>Example</title>
<para>
<programlisting>
$ repmgr -f /etc/repmgr.conf daemon pause
NOTICE: node 1 (node1) paused
NOTICE: node 2 (node2) paused
NOTICE: node 3 (node3) paused</programlisting>
</para>
</refsect1>
<refsect1>
<title>Options</title>
<variablelist>
<varlistentry>
<term><option>--dry-run</option></term>
<listitem>
<para>
Check if nodes are reachable but don't pause <application>repmgrd</application>.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
<refsect1>
<title>Exit codes</title>
<para>
Following exit codes can be emitted by <command>repmgr daemon unpause</command>:
</para>
<variablelist>
<varlistentry>
<term><option>SUCCESS (0)</option></term>
<listitem>
<para>
<application>repmgrd</application> could be paused on all nodes.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>ERR_REPMGRD_PAUSE (26)</option></term>
<listitem>
<para>
<application>repmgrd</application> could not be paused on one or mode nodes.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
<refsect1>
<title>See also</title>
<para>
<xref linkend="repmgr-daemon-unpause">, <xref linkend="repmgr-daemon-status">
</para>
</refsect1>
</refentry>

View File

@@ -0,0 +1,165 @@
<refentry id="repmgr-daemon-status">
<indexterm>
<primary>repmgr daemon status</primary>
</indexterm>
<refmeta>
<refentrytitle>repmgr daemon status</refentrytitle>
</refmeta>
<refnamediv>
<refname>repmgr daemon status</refname>
<refpurpose>display information about the status of <application>repmgrd</application> on each node in the cluster</refpurpose>
</refnamediv>
<refsect1>
<title>Description</title>
<para>
This command provides an overview over all active nodes in the cluster and the state
of each node's <application>repmgrd</application> instance. It can be used to check
the result of <xref linkend="repmgr-daemon-pause"> and <xref linkend="repmgr-daemon-unpause">
operations.
</para>
</refsect1>
<refsect1>
<title>Execution</title>
<para>
<command>repmgr daemon status</command> can be executed on any active node in the
replication cluster. A valid <filename>repmgr.conf</filename> file is required.
</para>
<note>
<para>
After restarting PostgreSQL on any node, the <application>repmgrd</application> instance
will take a second or two before it is able to update its status. Until then,
<application>repmgrd</application> will be shown as not running.
</para>
</note>
</refsect1>
<refsect1>
<title>Examples</title>
<para>
<application>repmgrd</application> running normally on all nodes:
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
ID | Name | Role | Status | repmgrd | PID | Paused?
----+-------+---------+---------+---------+------+---------
1 | node1 | primary | running | running | 7851 | no
2 | node2 | standby | running | running | 7889 | no
3 | node3 | standby | running | running | 7918 | no</programlisting>
</para>
<para>
<application>repmgrd</application> paused on all nodes (using <xref linkend="repmgr-daemon-pause">):
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
ID | Name | Role | Status | repmgrd | PID | Paused?
----+-------+---------+---------+---------+------+---------
1 | node1 | primary | running | running | 7851 | yes
2 | node2 | standby | running | running | 7889 | yes
3 | node3 | standby | running | running | 7918 | yes</programlisting>
</para>
<para>
<application>repmgrd</application> not running on one node:
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
ID | Name | Role | Status | repmgrd | PID | Paused?
----+-------+---------+---------+-------------+------+---------
1 | node1 | primary | running | running | 7851 | yes
2 | node2 | standby | running | not running | n/a | n/a
3 | node3 | standby | running | running | 7918 | yes</programlisting>
</para>
</refsect1>
<refsect1>
<title>Options</title>
<variablelist>
<varlistentry>
<term><option>--csv</option></term>
<listitem>
<para>
<command>repmgr daemon status</command> accepts an optional parameter <literal>--csv</literal>, which
outputs the replication cluster's status in a simple CSV format, suitable for
parsing by scripts, e.g.:
<programlisting>
$ repmgr -f /etc/repmgr.conf daemon status --csv
1,node1,primary,1,1,10204,1
2,node2,standby,1,0,-1,1
3,node3,standby,1,1,10225,1</programlisting>
</para>
<para>
The columns have following meanings:
<itemizedlist spacing="compact" mark="bullet">
<listitem>
<simpara>
node ID
</simpara>
</listitem>
<listitem>
<simpara>
node name
</simpara>
</listitem>
<listitem>
<simpara>
node type (primary or standby)
</simpara>
</listitem>
<listitem>
<simpara>
PostgreSQL server running
</simpara>
</listitem>
<listitem>
<simpara>
<application>repmgrd</application> running (1 = running, 0 = not running)
</simpara>
</listitem>
<listitem>
<simpara>
<application>repmgrd</application> PID (-1 if not running)
</simpara>
</listitem>
<listitem>
<simpara>
<application>repmgrd</application> paused (1 = paused, 0 = not paused)
</simpara>
</listitem>
</itemizedlist>
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--verbose</option></term>
<listitem>
<para>
Display the full text of any database connection error messages
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
<refsect1>
<title>See also</title>
<para>
<xref linkend="repmgr-daemon-pause">, <xref linkend="repmgr-daemon-unpause">, <xref linkend="repmgr-cluster-show">
</para>
</refsect1>
</refentry>

View File

@@ -0,0 +1,103 @@
<refentry id="repmgr-daemon-unpause">
<indexterm>
<primary>repmgr daemon unpause</primary>
</indexterm>
<refmeta>
<refentrytitle>repmgr daemon unpause</refentrytitle>
</refmeta>
<refnamediv>
<refname>repmgr daemon unpause</refname>
<refpurpose>Instruct all <application>repmgrd</application> instances in the replication cluster to resume failover operations</refpurpose>
</refnamediv>
<refsect1>
<title>Description</title>
<para>
This command can be run on any active node in the replication cluster to instruct all
running <application>repmgrd</application> instances to &quot;unpause&quot;
(following a previous execution of <xref linkend="repmgr-daemon-pause">)
and resume normal failover/monitoring operation.
</para>
<note>
<para>
It's important to wait a few seconds after restarting PostgreSQL on any node before running
<command>repmgr daemon pause</command>, as the <application>repmgrd</application> instance
on the restarted node will take a second or two before it has updated its status.
</para>
</note>
</refsect1>
<refsect1>
<title>Execution</title>
<para>
<command>repmgr daemon unpause</command> can be executed on any active node in the
replication cluster. A valid <filename>repmgr.conf</filename> file is required.
It will have no effect on nodes which are not already paused.
</para>
</refsect1>
<refsect1>
<title>Example</title>
<para>
<programlisting>
$ repmgr -f /etc/repmgr.conf daemon unpause
NOTICE: node 1 (node1) unpaused
NOTICE: node 2 (node2) unpaused
NOTICE: node 3 (node3) unpaused</programlisting>
</para>
</refsect1>
<refsect1>
<title>Options</title>
<variablelist>
<varlistentry>
<term><option>--dry-run</option></term>
<listitem>
<para>
Check if nodes are reachable but don't unpause <application>repmgrd</application>.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
<refsect1>
<title>Exit codes</title>
<para>
Following exit codes can be emitted by <command>repmgr daemon unpause</command>:
</para>
<variablelist>
<varlistentry>
<term><option>SUCCESS (0)</option></term>
<listitem>
<para>
<application>repmgrd</application> could be unpaused on all nodes.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>ERR_REPMGRD_PAUSE (26)</option></term>
<listitem>
<para>
<application>repmgrd</application> could not be unpaused on one or mode nodes.
</para>
</listitem>
</varlistentry>
</variablelist>
</refsect1>
<refsect1>
<title>See also</title>
<para>
<xref linkend="repmgr-daemon-pause">, <xref linkend="repmgr-daemon-status">
</para>
</refsect1>
</refentry>

View File

@@ -35,6 +35,10 @@
&repmgr; will attempt to check for potential issues but cannot guarantee
a successful switchover.
</para>
<para>
&repmgr; will refuse to perform the switchover if an exclusive backup is running on
the current primary.
</para>
</note>
<para>
For more details on performing a switchover, including preparation and configuration,
@@ -43,11 +47,14 @@
<note>
<para>
<application>repmgrd</application> should not be active on any nodes while a switchover is being
executed. This restriction may be lifted in a later version.
From <link linkend="release-4.2">repmgr 4.2</link>, &repmgr; will instruct any running
<application>repmgrd</application> instances to pause operations while the switchover
is being carried out, to prevent <application>repmgrd</application> from
unintentionally promoting a node. For more details, see <xref linkend="repmgrd-pausing">.
</para>
<para>
&repmgr; will not perform the switchover if an exclusive backup is running on the current primary.
Users of &repmgr; versions prior to 4.2 should ensure that <application>repmgrd</application>
is not running on any nodes while a switchover is being executed.
</para>
</note>
@@ -61,8 +68,9 @@
<term><option>--always-promote</option></term>
<listitem>
<para>
Promote standby to primary, even if it is behind original primary
(original primary will be shut down in any case).
Promote standby to primary, even if it is behind or has diverged
from the original primary. The original primary will be shut down in any case,
and will need to be manually reintegrated into the replication cluster.
</para>
</listitem>
</varlistentry>
@@ -122,6 +130,21 @@
</listitem>
</varlistentry>
<varlistentry>
<term><option>--repmgrd-no-pause</option></term>
<listitem>
<para>
Don't pause <application>repmgrd</application> while executing a switchover.
</para>
<para>
This option should not be used unless you take steps by other means
to ensure <application>repmgrd</application> is paused or not
running on all nodes.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--siblings-follow</option></term>
<listitem>

View File

@@ -86,6 +86,7 @@
&repmgrd-cascading-replication;
&repmgrd-network-split;
&repmgrd-witness-server;
&repmgrd-pausing;
&repmgrd-degraded-monitoring;
&repmgrd-monitoring;
&repmgrd-bdr;
@@ -112,6 +113,9 @@
&repmgr-cluster-crosscheck;
&repmgr-cluster-event;
&repmgr-cluster-cleanup;
&repmgr-daemon-status;
&repmgr-daemon-pause;
&repmgr-daemon-unpause;
</part>
&appendix-release-notes;

169
doc/repmgrd-pausing.sgml Normal file
View File

@@ -0,0 +1,169 @@
<chapter id="repmgrd-pausing" xreflabel="Pausing repmgrd">
<indexterm>
<primary>repmgrd</primary>
<secondary>pausing</secondary>
</indexterm>
<indexterm>
<primary>pausing repmgrd</primary>
</indexterm>
<title>Pausing repmgrd</title>
<para>
In normal operation, <application>repmgrd</application> monitors the state of the
PostgreSQL node it is running on, and will take appropriate action if problems
are detected, e.g. (if so configured) promote the node to primary, if the existing
primary has been determined as failed.
</para>
<para>
However, <application>repmgrd</application> is unable to distinguish between
planned outages (such as performing a <link linkend="performing-switchover">switchover</link>
or upgrading a server), and an actual server outage. In versions prior to &repmgr; 4.2
it was necessary to stop <application>repmgrd</application> on all nodes (or at least
on all nodes where <application>repmgrd</application> is
<link linkend="repmgrd-automatic-failover">configured for automatic failover</link>)
to prevent <application>repmgrd</application> from making changes to the
replication cluster.
</para>
<para>
From <link linkend="release-4.2">&repmgr; 4.2</link>, <application>repmgrd</application>
can now be &quot;paused&quot;, i.e. instructed not to take any action such as performing a failover.
This can be done from any node in the cluster, removing the need to stop/restart
each <application>repmgrd</application> individually.
</para>
<sect1 id="repmgrd-pausing-prerequisites">
<title>Prerequisites for pausing <application>repmgrd</application></title>
<para>
In order to be able to pause/unpause <application>repmgrd</application>, following
prerequisites must be met:
<itemizedlist spacing="compact" mark="bullet">
<listitem>
<simpara><link linkend="release-4.2">&repmgr; 4.2</link> or later must be installed on all nodes.</simpara>
</listitem>
<listitem>
<simpara>The same major &repmgr; version (e.g. 4.2) must be installed on all nodes (and preferably the same minor version).</simpara>
</listitem>
<listitem>
<simpara>
PostgreSQL on all nodes must be accessible from the node where the
<literal>pause</literal>/<literal>unpause</literal> operation is executed, using the
<varname>conninfo</varname> string shown by <link linkend="repmgr-cluster-show"><command>repmgr cluster show</command></link>.
</simpara>
</listitem>
</itemizedlist>
</para>
<note>
<para>
These conditions are required for normal &repmgr; operation in any case.
</para>
</note>
</sect1>
<sect1 id="repmgrd-pausing-execution">
<title>Pausing/unpausing <application>repmgrd</application></title>
<para>
To pause <application>repmgrd</application>, execute <link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link>, e.g.:
<programlisting>
$ repmgr -f /etc/repmgr.conf daemon pause
NOTICE: node 1 (node1) paused
NOTICE: node 2 (node2) paused
NOTICE: node 3 (node3) paused</programlisting>
</para>
<para>
The state of <application>repmgrd</application> on each node can be checked with
<link linkend="repmgr-daemon-status"><command>repmgr daemon status</command></link>, e.g.:
<programlisting>$ repmgr -f /etc/repmgr.conf daemon status
ID | Name | Role | Status | repmgrd | PID | Paused?
----+-------+---------+---------+---------+------+---------
1 | node1 | primary | running | running | 7851 | yes
2 | node2 | standby | running | running | 7889 | yes
3 | node3 | standby | running | running | 7918 | yes</programlisting>
</para>
<note>
<para>
If executing a switchover with <link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>,
&repmgr; will automatically pause/unpause <application>repmgrd</application> as part of the switchover process.
</para>
</note>
<para>
If the primary (in this example, <literal>node1</literal>) is stopped, <application>repmgrd</application>
running on one of the standbys (here: <literal>node2</literal>) will react like this:
<programlisting>
[2018-09-20 12:22:21] [WARNING] unable to connect to upstream node "node1" (node ID: 1)
[2018-09-20 12:22:21] [INFO] checking state of node 1, 1 of 5 attempts
[2018-09-20 12:22:21] [INFO] sleeping 1 seconds until next reconnection attempt
...
[2018-09-20 12:22:24] [INFO] sleeping 1 seconds until next reconnection attempt
[2018-09-20 12:22:25] [INFO] checking state of node 1, 5 of 5 attempts
[2018-09-20 12:22:25] [WARNING] unable to reconnect to node 1 after 5 attempts
[2018-09-20 12:22:25] [NOTICE] node is paused
[2018-09-20 12:22:33] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state
[2018-09-20 12:22:33] [DETAIL] repmgrd paused by administrator
[2018-09-20 12:22:33] [HINT] execute "repmgr daemon unpause" to resume normal failover mode</programlisting>
</para>
<para>
If the primary becomes available again (e.g. following a software upgrade), <application>repmgrd</application>
will automatically reconnect, e.g.:
<programlisting>
[2018-09-20 13:12:41] [NOTICE] reconnected to upstream node 1 after 8 seconds, resuming monitoring</programlisting>
</para>
<para>
To unpause <application>repmgrd</application>, execute <link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>, e.g.:
<programlisting>
$ repmgr -f /etc/repmgr.conf daemon pause
NOTICE: node 1 (node1) unpaused
NOTICE: node 2 (node2) unpaused
NOTICE: node 3 (node3) unpaused</programlisting>
</para>
<note>
<para>
If the previous primary is no longer accessible when <application>repmgrd</application>
is unpaused, no failover action will be taken. Instead, a new primary must be manually promoted using
<link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>,
and any standbys attached to the new primary with
<link linkend="repmgr-standby-follow"><command>repmgr standby follow</command></link>.
</para>
<para>
This is to prevent <link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>
resulting in the automatic promotion of a new primary, which may be a problem particularly
in larger clusters, where <application>repmgrd</application> could select a different promotion
candidate to the one intended by the administrator.
</para>
</note>
<sect2 id="repmgrd-pausing-details">
<title>Details on the <application>repmgrd</application> pausing mechanism</title>
<para>
The pause state of each node will be stored over a PostgreSQL restart.
</para>
<para>
<link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link> and
<link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link> can be
executed even if <application>repmgrd</application> is not running; in this case,
<application>repmgrd</application> will start up in whichever pause state has been set.
</para>
<note>
<para>
<link linkend="repmgr-daemon-pause"><command>repmgr daemon pause</command></link> and
<link linkend="repmgr-daemon-unpause"><command>repmgr daemon unpause</command></link>
<emphasis>do not</emphasis> stop/start <application>repmgrd</application>.
</para>
</note>
</sect2>
</sect1>
</chapter>

View File

@@ -19,9 +19,10 @@
</para>
<para>
<command>repmgr standby switchover</command> differs from other &repmgr;
actions in that it also performs actions on another server (the demotion
candidate), which means passwordless SSH access is required to that server
from the one where <command>repmgr standby switchover</command> is executed.
actions in that it also performs actions on other servers (the demotion
candidate, and optionally any other servers which are to follow the new primary),
which means passwordless SSH access is required to those servers from the one where
<command>repmgr standby switchover</command> is executed.
</para>
<note>
<simpara>
@@ -153,12 +154,18 @@
manually with <command>repmgr node check --archive-ready</command>.
</para>
<note>
<para>
Ensure that <application>repmgrd</application> is *not* running anywhere to prevent it unintentionally
promoting a node. This restriction will be removed in a future &repmgr; version.
</para>
</note>
<note>
<para>
From <link linkend="release-4.2">repmgr 4.2</link>, &repmgr; will instruct any running
<application>repmgrd</application> instances to pause operations while the switchover
is being carried out, to prevent <application>repmgrd</application> from
unintentionally promoting a node. For more details, see <xref linkend="repmgrd-pausing">.
</para>
<para>
Users of &repmgr; versions prior to 4.2 should ensure that <application>repmgrd</application>
is not running on any nodes while a switchover is being executed.
</para>
</note>
<para>
@@ -303,7 +310,21 @@
2 | node2 | primary | * running | | default | host=node2 dbname=repmgr user=repmgr
</programlisting>
</para>
<para>
If <application>repmgrd</application> is in use, it's worth double-checking that
all nodes are unpaused by executing <command><link linkend="repmgr-daemon-status">repmgr-daemon-status</link></command>.
</para>
<note>
<para>
Users of &repmgr; versions prior to 4.2 will need to manually restart <application>repmgrd</application>
on all nodes after the switchover is completed.
</para>
</note>
</sect1>
<sect1 id="switchover-caveats" xreflabel="Caveats">
<indexterm>
<primary>switchover</primary>
@@ -329,18 +350,8 @@
for details.
</simpara>
</listitem>
<listitem>
<simpara>
<application>repmgrd</application> should not be running with setting <varname>failover=automatic</varname>
in <filename>repmgr.conf</filename> when a switchover is carried out, otherwise the
<application>repmgrd</application> daemon may try and promote a standby by itself.
</simpara>
</listitem>
</itemizedlist>
</para>
<para>
We hope to remove some of these restrictions in future versions of &repmgr;.
</para>
</sect1>
<sect1 id="switchover-troubleshooting" xreflabel="Troubleshooting">

View File

@@ -47,5 +47,6 @@
#define ERR_FOLLOW_FAIL 23
#define ERR_REJOIN_FAIL 24
#define ERR_NODE_STATUS 25
#define ERR_REPMGRD_PAUSE 26
#endif /* _ERRCODE_H_ */

View File

@@ -145,6 +145,21 @@ CREATE FUNCTION unset_bdr_failover_handler()
AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler'
LANGUAGE C STRICT;
CREATE FUNCTION get_repmgrd_pid()
RETURNS INT
AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
LANGUAGE C STRICT;
CREATE FUNCTION get_repmgrd_pidfile()
RETURNS TEXT
AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile'
LANGUAGE C STRICT;
CREATE FUNCTION set_repmgrd_pid(INT, TEXT)
RETURNS VOID
AS 'MODULE_PATHNAME', 'set_repmgrd_pid'
LANGUAGE C STRICT;
CREATE VIEW repmgr.replication_status AS
SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name,

View File

@@ -145,6 +145,36 @@ CREATE FUNCTION unset_bdr_failover_handler()
AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler'
LANGUAGE C STRICT;
CREATE FUNCTION get_repmgrd_pid()
RETURNS INT
AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
LANGUAGE C STRICT;
CREATE FUNCTION get_repmgrd_pidfile()
RETURNS TEXT
AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile'
LANGUAGE C STRICT;
CREATE FUNCTION set_repmgrd_pid(INT, TEXT)
RETURNS VOID
AS 'MODULE_PATHNAME', 'set_repmgrd_pid'
LANGUAGE C STRICT;
CREATE FUNCTION repmgrd_is_running()
RETURNS BOOL
AS 'MODULE_PATHNAME', 'repmgrd_is_running'
LANGUAGE C STRICT;
CREATE FUNCTION repmgrd_pause(BOOL)
RETURNS VOID
AS 'MODULE_PATHNAME', 'repmgrd_pause'
LANGUAGE C STRICT;
CREATE FUNCTION repmgrd_is_paused()
RETURNS BOOL
AS 'MODULE_PATHNAME', 'repmgrd_is_paused'
LANGUAGE C STRICT;
CREATE VIEW repmgr.replication_status AS
SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name,

View File

@@ -26,7 +26,6 @@
#define SHOW_HEADER_COUNT 7
typedef enum
{
SHOW_ID = 0,
@@ -51,14 +50,6 @@ typedef enum
} EventHeader;
struct ColHeader
{
char title[MAXLEN];
int max_length;
int cur_length;
};
struct ColHeader headers_show[SHOW_HEADER_COUNT];
struct ColHeader headers_event[EVENT_HEADER_COUNT];
@@ -159,7 +150,7 @@ do_cluster_show(void)
else
{
item_list_append_format(&warnings,
"unable to connect to node \"%s\" (ID: %i)",
"unable to connect to node \"%s\" (ID: %i)",
cell->node_info->node_name, cell->node_info->node_id);
}
}
@@ -364,36 +355,10 @@ do_cluster_show(void)
}
/* Print column header row (text mode only) */
if (runtime_options.output_mode == OM_TEXT)
{
for (i = 0; i < SHOW_HEADER_COUNT; i++)
{
if (i == 0)
printf(" ");
else
printf(" | ");
printf("%-*s",
headers_show[i].max_length,
headers_show[i].title);
}
printf("\n");
printf("-");
for (i = 0; i < SHOW_HEADER_COUNT; i++)
{
int j;
for (j = 0; j < headers_show[i].max_length; j++)
printf("-");
if (i < (SHOW_HEADER_COUNT - 1))
printf("-+-");
else
printf("-");
}
printf("\n");
print_status_header(SHOW_HEADER_COUNT, headers_show);
}
for (cell = nodes.head; cell; cell = cell->next)

420
repmgr-action-daemon.c Normal file
View File

@@ -0,0 +1,420 @@
/*
* repmgr-action-daemon.c
*
* Implements repmgrd actions for the repmgr command line utility
* Copyright (c) 2ndQuadrant, 2010-2018
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "repmgr.h"
#include "repmgr-client-global.h"
#include "repmgr-action-daemon.h"
/*
* Possibly also show:
* - repmgrd start time?
* - repmgrd mode
* - priority
* - whether promotion candidate (due to zero priority/different location)
*/
typedef enum
{
STATUS_ID = 0,
STATUS_NAME,
STATUS_ROLE,
STATUS_PG,
STATUS_RUNNING,
STATUS_PID,
STATUS_PAUSED
} StatusHeader;
#define STATUS_HEADER_COUNT 7
struct ColHeader headers_status[STATUS_HEADER_COUNT];
static void fetch_node_records(PGconn *conn, NodeInfoList *node_list);
static void _do_repmgr_pause(bool pause);
void
do_daemon_status(void)
{
PGconn *conn = NULL;
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
NodeInfoListCell *cell = NULL;
int i;
RepmgrdInfo **repmgrd_info;
ItemList warnings = {NULL, NULL};
/* Connect to local database to obtain cluster connection data */
log_verbose(LOG_INFO, _("connecting to database"));
if (strlen(config_file_options.conninfo))
conn = establish_db_connection(config_file_options.conninfo, true);
else
conn = establish_db_connection_by_params(&source_conninfo, true);
fetch_node_records(conn, &nodes);
repmgrd_info = (RepmgrdInfo **) pg_malloc0(sizeof(RepmgrdInfo *) * nodes.node_count);
if (repmgrd_info == NULL)
{
log_error(_("unable to allocate memory"));
exit(ERR_OUT_OF_MEMORY);
}
strncpy(headers_status[STATUS_ID].title, _("ID"), MAXLEN);
strncpy(headers_status[STATUS_NAME].title, _("Name"), MAXLEN);
strncpy(headers_status[STATUS_ROLE].title, _("Role"), MAXLEN);
strncpy(headers_status[STATUS_PG].title, _("Status"), MAXLEN);
strncpy(headers_status[STATUS_RUNNING].title, _("repmgrd"), MAXLEN);
strncpy(headers_status[STATUS_PID].title, _("PID"), MAXLEN);
strncpy(headers_status[STATUS_PAUSED].title, _("Paused?"), MAXLEN);
for (i = 0; i < STATUS_HEADER_COUNT; i++)
{
headers_status[i].max_length = strlen(headers_status[i].title);
}
i = 0;
for (cell = nodes.head; cell; cell = cell->next)
{
int j;
repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
repmgrd_info[i]->node_id = cell->node_info->node_id;
repmgrd_info[i]->pid = UNKNOWN_PID;
repmgrd_info[i]->paused = false;
repmgrd_info[i]->running = false;
repmgrd_info[i]->pg_running = true;
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
{
if (runtime_options.verbose)
{
char error[MAXLEN];
strncpy(error, PQerrorMessage(cell->node_info->conn), MAXLEN);
item_list_append_format(&warnings,
"when attempting to connect to node \"%s\" (ID: %i), following error encountered :\n\"%s\"",
cell->node_info->node_name, cell->node_info->node_id, trim(error));
}
else
{
item_list_append_format(&warnings,
"unable to connect to node \"%s\" (ID: %i)",
cell->node_info->node_name, cell->node_info->node_id);
}
repmgrd_info[i]->pg_running = false;
maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("not running"));
maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("n/a"));
maxlen_snprintf(repmgrd_info[i]->pid_text, "%s", _("n/a"));
}
else
{
maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("running"));
repmgrd_info[i]->pid = repmgrd_get_pid(cell->node_info->conn);
repmgrd_info[i]->running = repmgrd_is_running(cell->node_info->conn);
if (repmgrd_info[i]->running == true)
{
maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("running"));
}
else
{
maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("not running"));
}
if (repmgrd_info[i]->pid == UNKNOWN_PID)
{
maxlen_snprintf(repmgrd_info[i]->pid_text, "%s", _("n/a"));
}
else
{
maxlen_snprintf(repmgrd_info[i]->pid_text, "%i", repmgrd_info[i]->pid);
}
repmgrd_info[i]->paused = repmgrd_is_paused(cell->node_info->conn);
PQfinish(cell->node_info->conn);
}
headers_status[STATUS_NAME].cur_length = strlen(cell->node_info->node_name);
headers_status[STATUS_ROLE].cur_length = strlen(get_node_type_string(cell->node_info->type));
headers_status[STATUS_PID].cur_length = strlen(repmgrd_info[i]->pid_text);
headers_status[STATUS_RUNNING].cur_length = strlen(repmgrd_info[i]->repmgrd_running);
headers_status[STATUS_PG].cur_length = strlen(repmgrd_info[i]->pg_running_text);
for (j = 0; j < STATUS_HEADER_COUNT; j++)
{
if (headers_status[j].cur_length > headers_status[j].max_length)
{
headers_status[j].max_length = headers_status[j].cur_length;
}
}
i++;
}
/* Print column header row (text mode only) */
if (runtime_options.output_mode == OM_TEXT)
{
print_status_header(STATUS_HEADER_COUNT, headers_status);
}
i = 0;
for (cell = nodes.head; cell; cell = cell->next)
{
if (runtime_options.output_mode == OM_CSV)
{
printf("%i,%s,%s,%i,%i,%i,%i\n",
cell->node_info->node_id,
cell->node_info->node_name,
get_node_type_string(cell->node_info->type),
repmgrd_info[i]->pg_running ? 1 : 0,
repmgrd_info[i]->running ? 1 : 0,
repmgrd_info[i]->pid,
repmgrd_info[i]->paused ? 1 : 0);
}
else
{
printf(" %-*i ", headers_status[STATUS_ID].max_length, cell->node_info->node_id);
printf("| %-*s ", headers_status[STATUS_NAME].max_length, cell->node_info->node_name);
printf("| %-*s ", headers_status[STATUS_ROLE].max_length, get_node_type_string(cell->node_info->type));
printf("| %-*s ", headers_status[STATUS_PG].max_length, repmgrd_info[i]->pg_running_text);
printf("| %-*s ", headers_status[STATUS_RUNNING].max_length, repmgrd_info[i]->repmgrd_running);
printf("| %-*s ", headers_status[STATUS_PID].max_length, repmgrd_info[i]->pid_text);
if (repmgrd_info[i]->pid == UNKNOWN_PID)
printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, "n/a");
else
printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, repmgrd_info[i]->paused ? "yes" : "no");
printf("\n");
}
free(repmgrd_info[i]);
i++;
}
free(repmgrd_info);
/* emit any warnings */
if (warnings.head != NULL && runtime_options.terse == false && runtime_options.output_mode != OM_CSV)
{
ItemListCell *cell = NULL;
printf(_("\nWARNING: following issues were detected\n"));
for (cell = warnings.head; cell; cell = cell->next)
{
printf(_(" - %s\n"), cell->string);
}
if (runtime_options.verbose == false)
{
log_hint(_("execute with --verbose option to see connection error messages"));
}
}
}
void
do_daemon_pause(void)
{
_do_repmgr_pause(true);
}
void
do_daemon_unpause(void)
{
_do_repmgr_pause(false);
}
static void
_do_repmgr_pause(bool pause)
{
PGconn *conn = NULL;
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
NodeInfoListCell *cell = NULL;
RepmgrdInfo **repmgrd_info;
int i;
int error_nodes = 0;
repmgrd_info = (RepmgrdInfo **) pg_malloc0(sizeof(RepmgrdInfo *) * nodes.node_count);
if (repmgrd_info == NULL)
{
log_error(_("unable to allocate memory"));
exit(ERR_OUT_OF_MEMORY);
}
/* Connect to local database to obtain cluster connection data */
log_verbose(LOG_INFO, _("connecting to database"));
if (strlen(config_file_options.conninfo))
conn = establish_db_connection(config_file_options.conninfo, true);
else
conn = establish_db_connection_by_params(&source_conninfo, true);
fetch_node_records(conn, &nodes);
i = 0;
for (cell = nodes.head; cell; cell = cell->next)
{
repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
repmgrd_info[i]->node_id = cell->node_info->node_id;
log_verbose(LOG_DEBUG, "pausing node %i (%s)",
cell->node_info->node_id,
cell->node_info->node_name);
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
{
log_warning(_("unable to connect to node %i"),
cell->node_info->node_id);
error_nodes++;
}
else
{
if (runtime_options.dry_run == true)
{
if (pause == true)
{
log_info(_("would pause node %i (%s) "),
cell->node_info->node_id,
cell->node_info->node_name);
}
else
{
log_info(_("would unpause node %i (%s) "),
cell->node_info->node_id,
cell->node_info->node_name);
}
}
else
{
bool success = repmgrd_pause(cell->node_info->conn, pause);
if (success == false)
error_nodes++;
log_notice(_("node %i (%s) %s"),
cell->node_info->node_id,
cell->node_info->node_name,
success == true
? pause == true ? "paused" : "unpaused"
: pause == true ? "not paused" : "not unpaused");
}
PQfinish(cell->node_info->conn);
}
i++;
}
if (error_nodes > 0)
{
if (pause == true)
{
log_error(_("unable to pause %i node(s)"), error_nodes);
}
else
{
log_error(_("unable to unpause %i node(s)"), error_nodes);
}
log_hint(_("execute \"repmgr daemon status\" to view current status"));
exit(ERR_REPMGRD_PAUSE);
}
exit(SUCCESS);
}
void
fetch_node_records(PGconn *conn, NodeInfoList *node_list)
{
bool success = get_all_node_records(conn, node_list);
if (success == false)
{
/* get_all_node_records() will display any error message */
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
if (node_list->node_count == 0)
{
log_error(_("no node records were found"));
log_hint(_("ensure at least one node is registered"));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
}
void do_daemon_help(void)
{
print_help_header();
printf(_("Usage:\n"));
printf(_(" %s [OPTIONS] daemon status\n"), progname());
printf(_(" %s [OPTIONS] daemon pause\n"), progname());
printf(_(" %s [OPTIONS] daemon unpause\n"), progname());
puts("");
printf(_("DAEMON STATUS\n"));
puts("");
printf(_(" \"daemon status\" shows the status of repmgrd on each node in the cluster\n"));
puts("");
printf(_(" --csv emit output as CSV\n"));
printf(_(" --verbose show text of database connection error messages\n"));
puts("");
printf(_("DAEMON PAUSE\n"));
puts("");
printf(_(" \"daemon pause\" instructs repmgrd on each node to pause failover detection\n"));
puts("");
printf(_(" --dry-run check if nodes are reachable but don't pause repmgrd\n"));
puts("");
printf(_("DAEMON PAUSE\n"));
puts("");
printf(_(" \"daemon unpause\" instructs repmgrd on each node to resume failover detection\n"));
puts("");
printf(_(" --dry-run check if nodes are reachable but don't unpause repmgrd\n"));
puts("");
puts("");
}

28
repmgr-action-daemon.h Normal file
View File

@@ -0,0 +1,28 @@
/*
* repmgr-action-daemon.h
* Copyright (c) 2ndQuadrant, 2010-2018
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _REPMGR_ACTION_DAEMON_H_
#define _REPMGR_ACTION_DAEMON_H_
extern void do_daemon_status(void);
extern void do_daemon_pause(void);
extern void do_daemon_unpause(void);
extern void do_daemon_help(void);
#endif

View File

@@ -2788,15 +2788,13 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor
/*
* Perform a switchover by:
*
* - stopping current primary node
* - promoting this standby node to primary
* - forcing previous primary node to follow this node
* - forcing the previous primary node to follow this node
*
* Caveat:
* - repmgrd must not be running, otherwise it may
* attempt a failover
* (TODO: find some way of notifying repmgrd of planned
* activity like this)
* Where running and not already paused, repmgrd will be paused (and
* subsequently unpaused), unless --repmgrd-no-pause provided.
*
* TODO:
* - make connection test timeouts/intervals configurable (see below)
@@ -2854,6 +2852,11 @@ do_standby_switchover(void)
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
/* used for handling repmgrd pause/unpause */
NodeInfoList all_nodes = T_NODE_INFO_LIST_INITIALIZER;
RepmgrdInfo **repmgrd_info = NULL;
int repmgrd_running_count = 0;
/*
* SANITY CHECKS
*
@@ -2924,7 +2927,7 @@ do_standby_switchover(void)
if (record_status != RECORD_FOUND)
{
log_error(_("unable to retrieve node record for node %i"),
log_error(_("unable to retrieve node record for currentr primary (node %i)"),
remote_node_id);
PQfinish(local_conn);
@@ -2980,6 +2983,7 @@ do_standby_switchover(void)
{
min_required_free_slots++;
}
/*
* If --force-rewind specified, check pg_rewind can be used, and
* pre-emptively fetch the list of configuration files which should be
@@ -3544,8 +3548,8 @@ do_standby_switchover(void)
log_debug("minimum of %i free slots (%i for siblings) required; %i available",
min_required_free_slots,
reachable_sibling_nodes_with_slot_count
, available_slots);
reachable_sibling_nodes_with_slot_count,
available_slots);
if (available_slots < min_required_free_slots)
{
@@ -3575,6 +3579,147 @@ do_standby_switchover(void)
}
}
/*
* Attempt to pause all repmgrd instances, unless user explicitly
* specifies not to.
*/
if (runtime_options.repmgrd_no_pause == false)
{
NodeInfoListCell *cell = NULL;
ItemList repmgrd_connection_errors = {NULL, NULL};
int i = 0;
int unreachable_node_count = 0;
get_all_node_records(local_conn, &all_nodes);
repmgrd_info = (RepmgrdInfo **) pg_malloc0(sizeof(RepmgrdInfo *) * all_nodes.node_count);
for (cell = all_nodes.head; cell; cell = cell->next)
{
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo));
repmgrd_info[i]->node_id = cell->node_info->node_id;
repmgrd_info[i]->pid = UNKNOWN_PID;
repmgrd_info[i]->paused = false;
repmgrd_info[i]->running = false;
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
{
/*
* unable to connect; treat this as an error
*/
repmgrd_info[i]->pg_running = false;
item_list_append_format(&repmgrd_connection_errors,
_("unable to connect to node \"%s\" (ID %i)"),
cell->node_info->node_name,
cell->node_info->node_id);
unreachable_node_count++;
continue;
}
repmgrd_info[i]->running = repmgrd_is_running(cell->node_info->conn);
repmgrd_info[i]->pid = repmgrd_get_pid(cell->node_info->conn);
repmgrd_info[i]->paused = repmgrd_is_paused(cell->node_info->conn);
if (repmgrd_info[i]->running == true)
repmgrd_running_count++;
i++;
}
if (unreachable_node_count > 0)
{
PQExpBufferData msg;
PQExpBufferData detail;
ItemListCell *cell;
initPQExpBuffer(&msg);
appendPQExpBuffer(&msg,
_("unable to connect to %i node(s), unable to pause all repmgrd instances"),
unreachable_node_count);
initPQExpBuffer(&detail);
for (cell = repmgrd_connection_errors.head; cell; cell = cell->next)
{
appendPQExpBuffer(&detail,
" %s\n",
cell->string);
}
if (runtime_options.force == false)
{
log_error("%s", msg.data);
}
else
{
log_warning("%s", msg.data);
}
log_detail(_("following node(s) unreachable:\n%s"), detail.data);
termPQExpBuffer(&msg);
termPQExpBuffer(&detail);
/* tell user about footgun */
if (runtime_options.force == false)
{
log_hint(_("use -F/--force to continue anyway"));
clear_node_info_list(&sibling_nodes);
clear_node_info_list(&all_nodes);
exit(ERR_SWITCHOVER_FAIL);
}
}
if (repmgrd_running_count > 0)
{
i = 0;
for (cell = all_nodes.head; cell; cell = cell->next)
{
/*
* Skip if node is already paused. Note we won't unpause these, to
* leave the repmgrd instances in the cluster in the same state they
* were before the switchover.
*/
if (repmgrd_info[i]->paused == true)
{
PQfinish(cell->node_info->conn);
cell->node_info->conn = NULL;
i++;
continue;
}
if (runtime_options.dry_run == true)
{
log_info(_("would pause repmgrd on node %s (ID %i)"),
cell->node_info->node_name,
cell->node_info->node_id);
}
else
{
/* XXX check result */
log_debug("pausing repmgrd on node %s (ID %i)",
cell->node_info->node_name,
cell->node_info->node_id);
(void) repmgrd_pause(cell->node_info->conn, true);
}
PQfinish(cell->node_info->conn);
cell->node_info->conn = NULL;
i++;
}
}
}
/*
* Sanity checks completed - prepare for the switchover
@@ -3656,6 +3801,7 @@ do_standby_switchover(void)
shutdown_command);
clear_node_info_list(&sibling_nodes);
clear_node_info_list(&all_nodes);
key_value_list_free(&remote_config_files);
return;
@@ -3793,7 +3939,7 @@ do_standby_switchover(void)
/*
* if pg_rewind is requested, issue a checkpoint immediately after promoting
* If pg_rewind is requested, issue a checkpoint immediately after promoting
* the local node, as pg_rewind compares timelines on the basis of the value
* in pg_control, which is written at the first checkpoint, which might not
* occur immediately.
@@ -3805,7 +3951,7 @@ do_standby_switchover(void)
}
/*
* Execute `repmgr node rejoin` to create recovery.conf and start the
* Execute "repmgr node rejoin" to create recovery.conf and start the
* remote server. Additionally execute "pg_rewind", if required and
* requested.
*/
@@ -3819,6 +3965,7 @@ do_standby_switchover(void)
{
log_error(_("new primary diverges from former primary and --force-rewind not provided"));
log_hint(_("the former primary will need to be restored manually, or use \"repmgr node rejoin\""));
termPQExpBuffer(&node_rejoin_options);
PQfinish(local_conn);
exit(ERR_SWITCHOVER_FAIL);
@@ -3875,7 +4022,7 @@ do_standby_switchover(void)
if (command_success == false)
{
log_error(_("rejoin failed %i"), r);
log_error(_("rejoin failed with error code %i"), r);
create_event_notification_extended(local_conn,
&config_file_options,
@@ -3997,11 +4144,13 @@ do_standby_switchover(void)
clear_node_info_list(&sibling_nodes);
PQfinish(local_conn);
/*
* Clean up remote node. It's possible that the standby is still starting up,
* so poll for a while until we get a connection.
* Clean up remote node (primary demoted to standby). It's possible that the node is
* still starting up, so poll for a while until we get a connection.
*/
for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
@@ -4053,6 +4202,84 @@ do_standby_switchover(void)
PQfinish(remote_conn);
/*
* Attempt to unpause all paused repmgrd instances, unless user explicitly
* specifies not to.
*/
if (runtime_options.repmgrd_no_pause == false)
{
if (repmgrd_running_count > 0)
{
ItemList repmgrd_unpause_errors = {NULL, NULL};
NodeInfoListCell *cell = NULL;
int i = 0;
int error_node_count = 0;
for (cell = all_nodes.head; cell; cell = cell->next)
{
if (repmgrd_info[i]->paused == true)
{
log_debug("repmgrd on node %s (ID %i) paused before switchover, not unpausing",
cell->node_info->node_name,
cell->node_info->node_id);
i++;
continue;
}
log_debug("unpausing repmgrd on node %s (ID %i)",
cell->node_info->node_name,
cell->node_info->node_id);
cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo);
if (PQstatus(cell->node_info->conn) == CONNECTION_OK)
{
if (repmgrd_pause(cell->node_info->conn, false) == false)
{
item_list_append_format(&repmgrd_unpause_errors,
_("unable to unpause node \"%s\" (ID %i)"),
cell->node_info->node_name,
cell->node_info->node_id);
error_node_count++;
}
}
else
{
item_list_append_format(&repmgrd_unpause_errors,
_("unable to connect to node \"%s\" (ID %i)"),
cell->node_info->node_name,
cell->node_info->node_id);
error_node_count++;
}
i++;
}
if (error_node_count > 0)
{
PQExpBufferData detail;
ItemListCell *cell;
for (cell = repmgrd_unpause_errors.head; cell; cell = cell->next)
{
appendPQExpBuffer(&detail,
" %s\n",
cell->string);
}
log_warning(_("unable to unpause repmgrd on %i node(s)"),
error_node_count);
log_detail(_("errors encountered for following node(s):\n%s"), detail.data);
log_hint(_("check node connection and status; unpause manually with \"repmgr daemon unpause\""));
termPQExpBuffer(&detail);
}
}
clear_node_info_list(&all_nodes);
}
if (switchover_success == true)
{
@@ -6602,6 +6829,7 @@ do_standby_help(void)
printf(_(" (9.3 and 9.4 - provide \"pg_rewind\" path)\n"));
printf(_(" -R, --remote-user=USERNAME database server username for SSH operations (default: \"%s\")\n"), runtime_options.username);
printf(_(" --repmgrd-no-pause don't pause repmgrd\n"));
printf(_(" --siblings-follow have other standbys follow new primary\n"));
puts("");

View File

@@ -97,6 +97,7 @@ typedef struct
bool force_rewind_used;
char force_rewind_path[MAXPGPATH];
bool siblings_follow;
bool repmgrd_no_pause;
/* "node status" options */
bool is_shutdown_cleanly;
@@ -156,7 +157,7 @@ typedef struct
/* "standby register" options */ \
false, -1, DEFAULT_WAIT_START, \
/* "standby switchover" options */ \
false, false, "", false, \
false, false, "", false, false, \
/* "node status" options */ \
false, \
/* "node check" options */ \
@@ -193,6 +194,14 @@ typedef enum
} t_server_action;
typedef struct ColHeader
{
char title[MAXLEN];
int max_length;
int cur_length;
} ColHeader;
/* global configuration structures */
extern t_runtime_options runtime_options;
@@ -228,7 +237,10 @@ extern void get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGc
extern bool remote_command(const char *host, const char *user, const char *command, PQExpBufferData *outputbuf);
extern void make_remote_repmgr_path(PQExpBufferData *outputbuf, t_node_info *remote_node_record);
/* display functions */
extern void print_help_header(void);
extern void print_status_header(int cols, ColHeader *headers);
/* server control functions */
extern void get_server_action(t_server_action action, char *script, char *data_dir);

View File

@@ -30,10 +30,15 @@
* NODE STATUS
* NODE CHECK
*
* DAEMON STATUS
* DAEMON PAUSE
* DAEMON UNPAUSE
*
* For internal use:
* NODE REJOIN
* NODE SERVICE
*
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
@@ -62,6 +67,7 @@
#include "repmgr-action-bdr.h"
#include "repmgr-action-node.h"
#include "repmgr-action-cluster.h"
#include "repmgr-action-daemon.h"
#include <storage/fd.h> /* for PG_TEMP_FILE_PREFIX */
@@ -438,6 +444,10 @@ main(int argc, char **argv)
runtime_options.siblings_follow = true;
break;
case OPT_REPMGRD_NO_PAUSE:
runtime_options.repmgrd_no_pause = true;
break;
/*----------------------
* "node status" options
*----------------------
@@ -900,6 +910,21 @@ main(int argc, char **argv)
else if (strcasecmp(repmgr_action, "CLEANUP") == 0)
action = CLUSTER_CLEANUP;
}
else if (strcasecmp(repmgr_command, "DAEMON") == 0)
{
if (help_option == true)
{
do_daemon_help();
exit(SUCCESS);
}
if (strcasecmp(repmgr_action, "STATUS") == 0)
action = DAEMON_STATUS;
else if (strcasecmp(repmgr_action, "PAUSE") == 0)
action = DAEMON_PAUSE;
else if (strcasecmp(repmgr_action, "UNPAUSE") == 0)
action = DAEMON_UNPAUSE;
}
else
{
valid_repmgr_command_found = false;
@@ -1298,6 +1323,17 @@ main(int argc, char **argv)
do_cluster_cleanup();
break;
/* DAEMON */
case DAEMON_STATUS:
do_daemon_status();
break;
case DAEMON_PAUSE:
do_daemon_pause();
break;
case DAEMON_UNPAUSE:
do_daemon_unpause();
break;
default:
/* An action will have been determined by this point */
break;
@@ -1744,6 +1780,18 @@ check_cli_parameters(const int action)
}
}
if (runtime_options.repmgrd_no_pause == true)
{
switch (action)
{
case STANDBY_SWITCHOVER:
break;
default:
item_list_append_format(&cli_warnings,
_("--repmgrd-no-pause will be ignored when executing %s"),
action_name(action));
}
}
if (runtime_options.config_files[0] != '\0')
{
@@ -1772,6 +1820,8 @@ check_cli_parameters(const int action)
case WITNESS_UNREGISTER:
case NODE_REJOIN:
case NODE_SERVICE:
case DAEMON_PAUSE:
case DAEMON_UNPAUSE:
break;
default:
item_list_append_format(&cli_warnings,
@@ -1851,6 +1901,14 @@ action_name(const int action)
return "CLUSTER MATRIX";
case CLUSTER_CROSSCHECK:
return "CLUSTER CROSSCHECK";
case DAEMON_STATUS:
return "DAEMON STATUS";
case DAEMON_PAUSE:
return "DAEMON PAUSE";
case DAEMON_UNPAUSE:
return "DAEMON UNPAUSE";
}
return "UNKNOWN ACTION";
@@ -1878,6 +1936,42 @@ print_error_list(ItemList *error_list, int log_level)
}
void
print_status_header(int cols, ColHeader *headers)
{
int i;
for (i = 0; i < cols; i++)
{
if (i == 0)
printf(" ");
else
printf(" | ");
printf("%-*s",
headers[i].max_length,
headers[i].title);
}
printf("\n");
printf("-");
for (i = 0; i < cols; i++)
{
int j;
for (j = 0; j < headers[i].max_length; j++)
printf("-");
if (i < (cols - 1))
printf("-+-");
else
printf("-");
}
printf("\n");
}
void
print_help_header(void)
{
@@ -3021,4 +3115,3 @@ drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name)
}
}
}

View File

@@ -45,6 +45,9 @@
#define CLUSTER_MATRIX 19
#define CLUSTER_CROSSCHECK 20
#define CLUSTER_EVENT 21
#define DAEMON_STATUS 22
#define DAEMON_PAUSE 23
#define DAEMON_UNPAUSE 24
/* command line options without short versions */
#define OPT_HELP 1001
@@ -88,6 +91,7 @@
#define OPT_RECOVERY_CONF_ONLY 1039
#define OPT_NO_WAIT 1040
#define OPT_MISSING_SLOTS 1041
#define OPT_REPMGRD_NO_PAUSE 1042
/* deprecated since 3.3 */
#define OPT_DATA_DIR 999
@@ -156,6 +160,7 @@ static struct option long_options[] =
*/
{"always-promote", no_argument, NULL, OPT_ALWAYS_PROMOTE},
{"siblings-follow", no_argument, NULL, OPT_SIBLINGS_FOLLOW},
{"repmgrd-no-pause", no_argument, NULL, OPT_REPMGRD_NO_PAUSE},
/* "node status" options */
{"is-shutdown-cleanly", no_argument, NULL, OPT_IS_SHUTDOWN_CLEANLY},

260
repmgr.c
View File

@@ -26,6 +26,7 @@
#include "access/xlog.h"
#include "miscadmin.h"
#include "replication/walreceiver.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/procarray.h"
@@ -43,14 +44,21 @@
#include "lib/stringinfo.h"
#include "access/xact.h"
#include "utils/snapmgr.h"
#include "pgstat.h"
#if (PG_VERSION_NUM >= 90400)
#include "pgstat.h"
#else
#define PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat"
#endif
#include "voting.h"
#define UNKNOWN_NODE_ID -1
#define UNKNOWN_PID -1
#define TRANCHE_NAME "repmgrd"
#define REPMGRD_STATE_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/repmgrd_state.txt"
PG_MODULE_MAGIC;
@@ -66,6 +74,9 @@ typedef struct repmgrdSharedState
LWLockId lock; /* protects search/modification */
TimestampTz last_updated;
int local_node_id;
int repmgrd_pid;
char repmgrd_pidfile[MAXPGPATH];
bool repmgrd_paused;
/* streaming failover */
NodeVotingStatus voting_status;
int current_electoral_term;
@@ -112,6 +123,25 @@ PG_FUNCTION_INFO_V1(am_bdr_failover_handler);
Datum unset_bdr_failover_handler(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(unset_bdr_failover_handler);
Datum set_repmgrd_pid(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(set_repmgrd_pid);
Datum get_repmgrd_pid(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(get_repmgrd_pid);
Datum get_repmgrd_pidfile(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(get_repmgrd_pidfile);
Datum repmgrd_is_running(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(repmgrd_is_running);
Datum repmgrd_pause(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(repmgrd_pause);
Datum repmgrd_is_paused(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(repmgrd_is_paused);
/*
* Module load callback
@@ -185,6 +215,9 @@ repmgr_shmem_startup(void)
#endif
shared_state->local_node_id = UNKNOWN_NODE_ID;
shared_state->repmgrd_pid = UNKNOWN_PID;
memset(shared_state->repmgrd_pidfile, 0, MAXPGPATH);
shared_state->repmgrd_paused = false;
shared_state->current_electoral_term = 0;
shared_state->voting_status = VS_NO_VOTE;
shared_state->candidate_node_id = UNKNOWN_NODE_ID;
@@ -204,6 +237,8 @@ Datum
set_local_node_id(PG_FUNCTION_ARGS)
{
int local_node_id = UNKNOWN_NODE_ID;
int stored_node_id = UNKNOWN_NODE_ID;
int paused = -1;
if (!shared_state)
PG_RETURN_NULL();
@@ -213,6 +248,34 @@ set_local_node_id(PG_FUNCTION_ARGS)
local_node_id = PG_GETARG_INT32(0);
/* read state file and if exists/valid, update "repmgrd_paused" */
{
FILE *file = NULL;
file = AllocateFile(REPMGRD_STATE_FILE, PG_BINARY_R);
if (file != NULL)
{
int buffer_size = 128;
char buffer[buffer_size];
if (fgets(buffer, buffer_size, file) != NULL)
{
if (sscanf(buffer, "%i:%i", &stored_node_id, &paused) != 2)
{
elog(WARNING, "unable to parse repmgrd state file");
}
else
{
elog(DEBUG1, "node_id: %i; paused: %i", stored_node_id, paused);
}
}
FreeFile(file);
}
}
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
/* only set local_node_id once, as it should never change */
@@ -221,6 +284,19 @@ set_local_node_id(PG_FUNCTION_ARGS)
shared_state->local_node_id = local_node_id;
}
/* only update if state file valid */
if (stored_node_id == shared_state->local_node_id)
{
if (paused == 0)
{
shared_state->repmgrd_paused = false;
}
else if (paused == 1)
{
shared_state->repmgrd_paused = true;
}
}
LWLockRelease(shared_state->lock);
PG_RETURN_VOID();
@@ -422,3 +498,185 @@ unset_bdr_failover_handler(PG_FUNCTION_ARGS)
PG_RETURN_VOID();
}
/*
* Returns the repmgrd pid; or NULL if none set; or -1 if set but repmgrd
* process not running (TODO!)
*/
Datum
get_repmgrd_pid(PG_FUNCTION_ARGS)
{
int repmgrd_pid = UNKNOWN_PID;
if (!shared_state)
PG_RETURN_NULL();
LWLockAcquire(shared_state->lock, LW_SHARED);
repmgrd_pid = shared_state->repmgrd_pid;
LWLockRelease(shared_state->lock);
PG_RETURN_INT32(repmgrd_pid);
}
/*
* Returns the repmgrd pidfile
*/
Datum
get_repmgrd_pidfile(PG_FUNCTION_ARGS)
{
char repmgrd_pidfile[MAXPGPATH];
if (!shared_state)
PG_RETURN_NULL();
memset(repmgrd_pidfile, 0, MAXPGPATH);
LWLockAcquire(shared_state->lock, LW_SHARED);
strncpy(repmgrd_pidfile, shared_state->repmgrd_pidfile, MAXPGPATH);
LWLockRelease(shared_state->lock);
if (repmgrd_pidfile[0] == '\0')
PG_RETURN_NULL();
PG_RETURN_TEXT_P(cstring_to_text(repmgrd_pidfile));
}
Datum
set_repmgrd_pid(PG_FUNCTION_ARGS)
{
int repmgrd_pid = UNKNOWN_PID;
char *repmgrd_pidfile = NULL;
if (!shared_state)
PG_RETURN_VOID();
if (PG_ARGISNULL(0))
{
repmgrd_pid = UNKNOWN_PID;
}
else
{
repmgrd_pid = PG_GETARG_INT32(0);
}
elog(DEBUG3, "set_repmgrd_pid(): provided pid is %i", repmgrd_pid);
if (repmgrd_pid != UNKNOWN_PID && !PG_ARGISNULL(1))
{
repmgrd_pidfile = text_to_cstring(PG_GETARG_TEXT_PP(1));
elog(INFO, "set_repmgrd_pid(): provided pidfile is %s", repmgrd_pidfile);
}
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
shared_state->repmgrd_pid = repmgrd_pid;
memset(shared_state->repmgrd_pidfile, 0, MAXPGPATH);
if(repmgrd_pidfile != NULL)
{
strncpy(shared_state->repmgrd_pidfile, repmgrd_pidfile, MAXPGPATH);
}
LWLockRelease(shared_state->lock);
PG_RETURN_VOID();
}
Datum
repmgrd_is_running(PG_FUNCTION_ARGS)
{
int repmgrd_pid = UNKNOWN_PID;
int kill_ret;
if (!shared_state)
PG_RETURN_NULL();
LWLockAcquire(shared_state->lock, LW_SHARED);
repmgrd_pid = shared_state->repmgrd_pid;
LWLockRelease(shared_state->lock);
/* No PID registered - assume not running */
if (repmgrd_pid == UNKNOWN_PID)
{
PG_RETURN_BOOL(false);
}
kill_ret = kill(repmgrd_pid, 0);
if (kill_ret == 0)
{
PG_RETURN_BOOL(true);
}
PG_RETURN_BOOL(false);
}
Datum
repmgrd_pause(PG_FUNCTION_ARGS)
{
bool pause;
FILE *file = NULL;
StringInfoData buf;
if (!shared_state)
PG_RETURN_NULL();
if (PG_ARGISNULL(0))
PG_RETURN_NULL();
pause = PG_GETARG_BOOL(0);
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
shared_state->repmgrd_paused = pause;
LWLockRelease(shared_state->lock);
/* write state to file */
file = AllocateFile(REPMGRD_STATE_FILE, PG_BINARY_W);
if (file == NULL)
{
elog(DEBUG1, "unable to allocate %s", REPMGRD_STATE_FILE);
// XXX anything else we can do? log?
PG_RETURN_VOID();
}
elog(DEBUG1, "allocated");
initStringInfo(&buf);
LWLockAcquire(shared_state->lock, LW_SHARED);
appendStringInfo(&buf, "%i:%i",
shared_state->local_node_id,
pause ? 1 : 0);
LWLockRelease(shared_state->lock);
// XXX check success
fwrite(buf.data, strlen(buf.data) + 1, 1, file);
resetStringInfo(&buf);
FreeFile(file);
PG_RETURN_VOID();
}
Datum
repmgrd_is_paused(PG_FUNCTION_ARGS)
{
bool is_paused;
if (!shared_state)
PG_RETURN_NULL();
LWLockAcquire(shared_state->lock, LW_SHARED);
is_paused = shared_state->repmgrd_paused;
LWLockRelease(shared_state->lock);
PG_RETURN_BOOL(is_paused);
}

View File

@@ -53,6 +53,7 @@
#define UNKNOWN_TIMELINE_ID -1
#define UNKNOWN_SYSTEM_IDENTIFIER 0
#define UNKNOWN_PID -1
#define NODE_NOT_FOUND -1
#define NO_UPSTREAM_NODE -1

View File

@@ -106,12 +106,13 @@ handle_sigint_physical(SIGNAL_ARGS)
else
writeable_conn = primary_conn;
create_event_notification(writeable_conn,
&config_file_options,
config_file_options.node_id,
"repmgrd_shutdown",
true,
event_details.data);
if (PQstatus(writeable_conn) == CONNECTION_OK)
create_event_notification(writeable_conn,
&config_file_options,
config_file_options.node_id,
"repmgrd_shutdown",
true,
event_details.data);
termPQExpBuffer(&event_details);
@@ -145,7 +146,6 @@ do_physical_node_check(void)
case FAILOVER_AUTOMATIC:
log_error(_("this node is marked as inactive and cannot be used as a failover target"));
log_hint(_("%s"), hint);
close_connection(&local_conn);
create_event_notification(NULL,
&config_file_options,
@@ -206,8 +206,7 @@ do_physical_node_check(void)
if (required_param_missing == true)
{
log_hint(_("add the missing configuration parameter(s) and start repmgrd again"));
close_connection(&local_conn);
exit(ERR_BAD_CONFIG);
terminate(ERR_BAD_CONFIG);
}
}
}
@@ -339,6 +338,7 @@ monitor_streaming_primary(void)
if (stored_local_node_id == UNKNOWN_NODE_ID)
{
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
repmgrd_set_pid(local_conn, getpid(), pid_file);
}
goto loop;
@@ -606,8 +606,7 @@ monitor_streaming_standby(void)
if (local_node_info.upstream_node_id == NODE_NOT_FOUND)
{
log_error(_("unable to determine an active primary for this cluster, terminating"));
close_connection(&local_conn);
exit(ERR_BAD_CONFIG);
terminate(ERR_BAD_CONFIG);
}
}
@@ -623,15 +622,15 @@ monitor_streaming_standby(void)
log_error(_("no record found for upstream node (ID: %i), terminating"),
local_node_info.upstream_node_id);
log_hint(_("ensure the upstream node is registered correctly"));
close_connection(&local_conn);
exit(ERR_DB_CONN);
terminate(ERR_DB_CONN);
}
else if (record_status == RECORD_ERROR)
{
log_error(_("unable to retrieve record for upstream node (ID: %i), terminating"),
local_node_info.upstream_node_id);
close_connection(&local_conn);
exit(ERR_DB_CONN);
terminate(ERR_DB_CONN);
}
log_debug("connecting to upstream node %i: \"%s\"", upstream_node_info.node_id, upstream_node_info.conninfo);
@@ -650,8 +649,7 @@ monitor_streaming_standby(void)
local_node_info.upstream_node_id);
log_hint(_("upstream node must be running before repmgrd can start"));
close_connection(&local_conn);
exit(ERR_DB_CONN);
terminate(ERR_DB_CONN);
}
/*
@@ -673,7 +671,8 @@ monitor_streaming_standby(void)
{
log_error(_("unable to connect to primary node"));
log_hint(_("ensure the primary node is reachable from this node"));
exit(ERR_DB_CONN);
terminate(ERR_DB_CONN);
}
log_verbose(LOG_DEBUG, "connected to primary");
@@ -799,28 +798,40 @@ monitor_streaming_standby(void)
goto loop;
}
/* still down after reconnect attempt(s) */
/* upstream is still down after reconnect attempt(s) */
if (upstream_node_info.node_status == NODE_STATUS_DOWN)
{
bool failover_done = false;
if (upstream_node_info.type == PRIMARY)
if (PQstatus(local_conn) == CONNECTION_OK && repmgrd_is_paused(local_conn))
{
failover_done = do_primary_failover();
log_notice(_("repmgrd on this node is paused"));
log_detail(_("no failover will be carried out"));
log_hint(_("execute \"repmgr daemon unpause\" to resume normal failover mode"));
monitoring_state = MS_DEGRADED;
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
}
else if (upstream_node_info.type == STANDBY)
else
{
failover_done = do_upstream_standby_failover();
}
if (upstream_node_info.type == PRIMARY)
{
failover_done = do_primary_failover();
}
else if (upstream_node_info.type == STANDBY)
{
failover_done = do_upstream_standby_failover();
}
/*
* XXX it's possible it will make sense to return in all
* cases to restart monitoring
*/
if (failover_done == true)
{
primary_node_id = get_primary_node_id(local_conn);
return;
/*
* XXX it's possible it will make sense to return in all
* cases to restart monitoring
*/
if (failover_done == true)
{
primary_node_id = get_primary_node_id(local_conn);
return;
}
}
}
}
@@ -990,7 +1001,7 @@ monitor_streaming_standby(void)
}
if (config_file_options.failover == FAILOVER_AUTOMATIC)
if (config_file_options.failover == FAILOVER_AUTOMATIC && repmgrd_is_paused(local_conn) == false)
{
get_active_sibling_node_records(local_conn,
local_node_info.node_id,
@@ -1066,7 +1077,15 @@ loop:
termPQExpBuffer(&monitoring_summary);
if (monitoring_state == MS_DEGRADED && config_file_options.failover == FAILOVER_AUTOMATIC)
{
log_detail(_("waiting for upstream or another primary to reappear"));
if (PQstatus(local_conn) == CONNECTION_OK && repmgrd_is_paused(local_conn))
{
log_detail(_("repmgrd paused by administrator"));
log_hint(_("execute \"repmgr daemon unpause\" to resume normal failover mode"));
}
else
{
log_detail(_("waiting for upstream or another primary to reappear"));
}
}
else if (config_file_options.monitoring_history == true)
{
@@ -1195,6 +1214,7 @@ loop:
if (stored_local_node_id == UNKNOWN_NODE_ID)
{
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
repmgrd_set_pid(local_conn, getpid(), pid_file);
}
}
}
@@ -1247,8 +1267,7 @@ monitor_streaming_witness(void)
upstream_node_info.node_id);
log_hint(_("primary node must be running before repmgrd can start"));
close_connection(&local_conn);
exit(ERR_DB_CONN);
terminate(ERR_DB_CONN);
}
/* synchronise local copy of "repmgr.nodes", in case it was stale */
@@ -1561,6 +1580,7 @@ loop:
if (stored_local_node_id == UNKNOWN_NODE_ID)
{
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
repmgrd_set_pid(local_conn, getpid(), pid_file);
}
}
}
@@ -2094,6 +2114,7 @@ do_upstream_standby_failover(void)
/* refresh shared memory settings which will have been zapped by the restart */
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
repmgrd_set_pid(local_conn, getpid(), pid_file);
/*
*
@@ -2564,6 +2585,7 @@ follow_new_primary(int new_primary_id)
/* refresh shared memory settings which will have been zapped by the restart */
repmgrd_set_local_node_id(local_conn, config_file_options.node_id);
repmgrd_set_pid(local_conn, getpid(), pid_file);
initPQExpBuffer(&event_details);
appendPQExpBuffer(&event_details,
@@ -3088,6 +3110,7 @@ check_connection(t_node_info *node_info, PGconn **conn)
if (stored_local_node_id == UNKNOWN_NODE_ID)
{
repmgrd_set_local_node_id(*conn, config_file_options.node_id);
repmgrd_set_pid(local_conn, getpid(), pid_file);
}
}

View File

@@ -35,7 +35,7 @@
static char *config_file = NULL;
static bool verbose = false;
static char pid_file[MAXPGPATH];
char pid_file[MAXPGPATH];
static bool daemonize = true;
static bool show_pid_file = false;
static bool no_pid_file = false;
@@ -488,6 +488,9 @@ main(int argc, char **argv)
check_and_create_pid_file(pid_file);
}
repmgrd_set_pid(local_conn, getpid(), pid_file);
#ifndef WIN32
setup_event_handlers();
#endif
@@ -901,6 +904,9 @@ print_monitoring_state(MonitoringState monitoring_state)
void
terminate(int retval)
{
if (PQstatus(local_conn) == CONNECTION_OK)
repmgrd_set_pid(local_conn, UNKNOWN_PID, NULL);
logger_shutdown();
if (pid_file[0] != '\0')

View File

@@ -20,6 +20,7 @@ extern t_configuration_options config_file_options;
extern t_node_info local_node_info;
extern PGconn *local_conn;
extern bool startup_event_logged;
extern char pid_file[MAXPGPATH];
void try_reconnect(PGconn **conn, t_node_info *node_info);