mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
13 Commits
dev/v5.5.0
...
v5.3.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
70c79aeaec | ||
|
|
afd876377c | ||
|
|
277910cb31 | ||
|
|
9554154677 | ||
|
|
2bbfb5daa0 | ||
|
|
82b0e85a66 | ||
|
|
3320eb0983 | ||
|
|
7862941300 | ||
|
|
f152fc3016 | ||
|
|
9c260e605d | ||
|
|
7ad06530e4 | ||
|
|
c787273f91 | ||
|
|
d40055c8dd |
4
HISTORY
4
HISTORY
@@ -1,11 +1,9 @@
|
||||
5.3.0 2021-??-??
|
||||
5.3.0 2021-10-12
|
||||
standby switchover: improve handling of node rejoin failure (Ian)
|
||||
repmgrd: prefix all shared library functions with "repmgr_" to
|
||||
minimize the risk of clashes with other shared libraries (Ian)
|
||||
repmgrd: at startup, if node record is marked as "inactive", attempt
|
||||
to set it to "active" (Ian)
|
||||
|
||||
5.2.2. 2021-??-??
|
||||
standby clone: set "slot_name" in node record if required (Ian)
|
||||
node rejoin: emit rejoin target note information as NOTICE (Ian)
|
||||
repmgrd: ensure short option "-s" is accepted (Ian)
|
||||
|
||||
@@ -13,6 +13,7 @@ DATA = \
|
||||
repmgr--unpackaged--4.0.sql \
|
||||
repmgr--unpackaged--5.1.sql \
|
||||
repmgr--unpackaged--5.2.sql \
|
||||
repmgr--unpackaged--5.3.sql \
|
||||
repmgr--4.0.sql \
|
||||
repmgr--4.0--4.1.sql \
|
||||
repmgr--4.1.sql \
|
||||
|
||||
7
bdr_api_version.h
Normal file
7
bdr_api_version.h
Normal file
@@ -0,0 +1,7 @@
|
||||
/*
|
||||
* bdr_version_api.h
|
||||
*
|
||||
* This is a dummy file to facilitated building Debian packages against
|
||||
* PostgreSQL Extended. It may be removed in the future. Do not add any
|
||||
* code to this file.
|
||||
*/
|
||||
39
dbutils.c
39
dbutils.c
@@ -4242,7 +4242,7 @@ _create_event(PGconn *conn, t_configuration_options *options, int node_id, char
|
||||
}
|
||||
break;
|
||||
case 'p':
|
||||
/* %p: primary id ("standby_switchover": former primary id) */
|
||||
/* %p: primary id ("standby_switchover"/"repmgrd_failover_promote": former primary id) */
|
||||
src_ptr++;
|
||||
if (event_info->node_id != UNKNOWN_NODE_ID)
|
||||
{
|
||||
@@ -6008,6 +6008,43 @@ is_wal_replay_paused(PGconn *conn, bool check_pending_wal)
|
||||
return is_paused;
|
||||
}
|
||||
|
||||
/* repmgrd status functions */
|
||||
|
||||
CheckStatus
|
||||
get_repmgrd_status(PGconn *conn)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
PGresult *res = NULL;
|
||||
CheckStatus repmgrd_status = CHECK_STATUS_CRITICAL;
|
||||
|
||||
initPQExpBuffer(&query);
|
||||
|
||||
appendPQExpBufferStr(&query,
|
||||
" SELECT "
|
||||
" CASE "
|
||||
" WHEN repmgr.repmgrd_is_running() "
|
||||
" THEN "
|
||||
" CASE "
|
||||
" WHEN repmgr.repmgrd_is_paused() THEN 1 ELSE 0 "
|
||||
" END "
|
||||
" ELSE 2 "
|
||||
" END AS repmgrd_status");
|
||||
res = PQexec(conn, query.data);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_db_error(conn, query.data, _("unable to execute repmgrd status query"));
|
||||
}
|
||||
else
|
||||
{
|
||||
repmgrd_status = atoi(PQgetvalue(res, 0, 0));
|
||||
}
|
||||
|
||||
termPQExpBuffer(&query);
|
||||
PQclear(res);
|
||||
return repmgrd_status;
|
||||
}
|
||||
|
||||
|
||||
/* miscellaneous debugging functions */
|
||||
|
||||
|
||||
@@ -602,6 +602,9 @@ int get_upstream_last_seen(PGconn *conn, t_server_type node_type);
|
||||
|
||||
bool is_wal_replay_paused(PGconn *conn, bool check_pending_wal);
|
||||
|
||||
/* repmgrd status functions */
|
||||
CheckStatus get_repmgrd_status(PGconn *conn);
|
||||
|
||||
/* miscellaneous debugging functions */
|
||||
const char *print_node_status(NodeStatus node_status);
|
||||
const char *print_pqping_status(PGPing ping_status);
|
||||
|
||||
@@ -18,13 +18,13 @@
|
||||
<!-- remember to update the release date in ../repmgr_version.h.in -->
|
||||
<sect1 id="release-5.3.0">
|
||||
<title id="release-current">Release 5.3.0</title>
|
||||
<para><emphasis>??? ? ???, 2021</emphasis></para>
|
||||
<para><emphasis>Tue 12 October, 2021</emphasis></para>
|
||||
<para>
|
||||
&repmgr; 5.3.0 is a major release.
|
||||
</para>
|
||||
<para>
|
||||
This release provides support for <ulink url="https://www.postgresql.org/docs/14/release-14.html">PostgreSQL 14</ulink>,
|
||||
to be released later in 2021.
|
||||
released in September 2021.
|
||||
</para>
|
||||
<sect2>
|
||||
<title>Improvements</title>
|
||||
@@ -44,6 +44,11 @@
|
||||
This makes it easier to identify unexpected events during a switchover operation, such as
|
||||
the demotion candidate being unexpectedly restarted by an external process.
|
||||
</para>
|
||||
<para>
|
||||
Note that the output of the <link linkend="repmgr-node-rejoin"><command>repmgr node rejoin</command></link>
|
||||
operation on the demotion candidate will now be logged to a temporary file on that node;
|
||||
the location of the file will be reported in the error message, if one is emitted.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
@@ -55,7 +60,33 @@
|
||||
<varname>repmgrd_exit_on_inactive_node</varname> to <literal>true</literal>.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-node-rejoin">repmgr node rejoin</link></command>:
|
||||
emit rejoin target note information as <literal>NOTICE</literal>.
|
||||
</para>
|
||||
<para>
|
||||
This makes it clearer what &repmgr; is trying to do.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<link linkend="repmgr-node-check">repmgr node check</link>:
|
||||
option <option>--repmgrd</option> added to check &repmgrd;
|
||||
status.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
Add <literal>%p</literal> <link linkend="event-notifications">event notification parameter</link>
|
||||
providing the node ID of the former primary for the <literal>repmgrd_failover_promote</literal> event.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
</para>
|
||||
</sect2>
|
||||
|
||||
@@ -63,31 +94,6 @@
|
||||
<title>Bug fixes</title>
|
||||
<para>
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
&repmgrd;: rename internal shared library functions to minimize the
|
||||
risk of clashes with other shared libraries.
|
||||
</para>
|
||||
<para>
|
||||
This does not affect user-facing SQL functions.
|
||||
</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="release-5.2.2">
|
||||
<title>Release 5.2.2</title>
|
||||
<para><emphasis>??? ? ???, 2021</emphasis></para>
|
||||
<para>
|
||||
&repmgr; 5.2.2 is a minor release.
|
||||
</para>
|
||||
<sect2>
|
||||
<title>Bug fixes</title>
|
||||
<para>
|
||||
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-standby-clone">repmgr standby clone</link></command>:
|
||||
@@ -99,23 +105,24 @@
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
<command><link linkend="repmgr-node-rejoin">repmgr node rejoin</link></command>:
|
||||
emit rejoin target note information as <literal>NOTICE</literal>.
|
||||
</para>
|
||||
<para>
|
||||
This makes it clearer what &repmgr; is trying to do.
|
||||
</para>
|
||||
</listitem>
|
||||
<para>
|
||||
&repmgrd;: rename internal shared library functions to minimize the
|
||||
risk of clashes with other shared libraries.
|
||||
</para>
|
||||
<para>
|
||||
This does not affect user-facing SQL functions. However an upgrade
|
||||
of the installed extension version is required.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
<listitem>
|
||||
<para>
|
||||
&repmgrd;: ensure short option <option>-s</option> is accepted.
|
||||
</para>
|
||||
</listitem>
|
||||
|
||||
</itemizedlist>
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
@@ -95,7 +95,8 @@
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The following parameters are provided for a subset of event notifications:
|
||||
The following parameters are provided for a subset of event notifications; their meaning may
|
||||
change according to context:
|
||||
</para>
|
||||
|
||||
<variablelist>
|
||||
@@ -108,6 +109,9 @@
|
||||
<para>
|
||||
node ID of the demoted primary (<xref linkend="repmgr-standby-switchover"/> only)
|
||||
</para>
|
||||
<para>
|
||||
node ID of the former primary (<literal>repmgrd_failover_promote</literal> only)
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
@@ -133,7 +137,7 @@
|
||||
|
||||
<para>
|
||||
The values provided for <literal>%c</literal> and <literal>%a</literal>
|
||||
will probably contain spaces, so should always be quoted.
|
||||
may contain spaces, so should always be quoted.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
|
||||
@@ -112,10 +112,9 @@
|
||||
</thead>
|
||||
|
||||
<tbody>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
&repmgr; 5.2
|
||||
&repmgr; 5.3
|
||||
</entry>
|
||||
<entry>
|
||||
YES
|
||||
@@ -123,6 +122,21 @@
|
||||
<entry>
|
||||
<link linkend="release-current">&repmgrversion;</link> (&releasedate;)
|
||||
</entry>
|
||||
<entry>
|
||||
9.4, 9.5, 9.6, 10, 11, 12, 13, 14
|
||||
</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
&repmgr; 5.2
|
||||
</entry>
|
||||
<entry>
|
||||
NO
|
||||
</entry>
|
||||
<entry>
|
||||
<link linkend="release-5.2.0">5.2.0</link> (2020-10-22)
|
||||
</entry>
|
||||
<entry>
|
||||
9.4, 9.5, 9.6, 10, 11, 12, 13
|
||||
</entry>
|
||||
|
||||
@@ -125,12 +125,29 @@
|
||||
is correctly configured.
|
||||
</simpara>
|
||||
</listitem>
|
||||
|
||||
|
||||
</itemizedlist>
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
|
||||
<refsect1>
|
||||
<title>repmgrd</title>
|
||||
<para>
|
||||
A separate check is available to verify whether &repmgrd; is running,
|
||||
This is not included in the general output, as this does not
|
||||
per-se constitute a check of the node's replication status.
|
||||
</para>
|
||||
<itemizedlist spacing="compact" mark="bullet">
|
||||
<listitem>
|
||||
<simpara>
|
||||
<option>--repmgrd</option>: checks whether &repmgrd; is running.
|
||||
If &repmgrd; is running but paused, status <literal>1</literal>
|
||||
(<literal>WARNING</literal>) is returned.
|
||||
</simpara>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
<title>Additional checks</title>
|
||||
<para>
|
||||
|
||||
@@ -1079,6 +1079,29 @@ REPMGRD_OPTS="--daemonize=false"
|
||||
</para>
|
||||
|
||||
</sect2>
|
||||
|
||||
<sect2 id="repmgrd-daemon-monitoring">
|
||||
<title>repmgrd daemon monitoring</title>
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>monitoring</secondary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>monitoring</primary>
|
||||
<secondary>repmgrd</secondary>
|
||||
</indexterm>
|
||||
|
||||
<para>
|
||||
The command <command><link linkend="repmgr-service-status">repmgr service status</link></command>
|
||||
provides an overview of the &repmgrd; daemon status (including pause status)
|
||||
on all nodes in the cluster.
|
||||
</para>
|
||||
<para>
|
||||
From &repmgr; 5.3, <command><link linkend="repmgr-node-check">repmgr node check --repmgrd</link></command>
|
||||
can be used to check the status of &repmgrd; (including pause status)
|
||||
on the local node.
|
||||
</para>
|
||||
</sect2>
|
||||
</sect1>
|
||||
|
||||
<sect1 id="repmgrd-connection-settings">
|
||||
|
||||
245
repmgr--unpackaged--5.3.sql
Normal file
245
repmgr--unpackaged--5.3.sql
Normal file
@@ -0,0 +1,245 @@
|
||||
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
|
||||
|
||||
-- extract the current schema name
|
||||
-- NOTE: this assumes there will be only one schema matching 'repmgr_%';
|
||||
-- user is responsible for ensuring this is the case
|
||||
|
||||
CREATE TEMPORARY TABLE repmgr_old_schema (schema_name TEXT);
|
||||
INSERT INTO repmgr_old_schema (schema_name)
|
||||
SELECT nspname AS schema_name
|
||||
FROM pg_catalog.pg_namespace
|
||||
WHERE nspname LIKE 'repmgr_%'
|
||||
LIMIT 1;
|
||||
|
||||
-- move old objects into new schema
|
||||
DO $repmgr$
|
||||
DECLARE
|
||||
old_schema TEXT;
|
||||
BEGIN
|
||||
SELECT schema_name FROM repmgr_old_schema
|
||||
INTO old_schema;
|
||||
EXECUTE format('ALTER TABLE %I.repl_nodes SET SCHEMA repmgr', old_schema);
|
||||
EXECUTE format('ALTER TABLE %I.repl_events SET SCHEMA repmgr', old_schema);
|
||||
EXECUTE format('ALTER TABLE %I.repl_monitor SET SCHEMA repmgr', old_schema);
|
||||
EXECUTE format('DROP VIEW IF EXISTS %I.repl_show_nodes', old_schema);
|
||||
EXECUTE format('DROP VIEW IF EXISTS %I.repl_status', old_schema);
|
||||
END$repmgr$;
|
||||
|
||||
-- convert "repmgr_$cluster.repl_nodes" to "repmgr.nodes"
|
||||
CREATE TABLE repmgr.nodes (
|
||||
node_id INTEGER PRIMARY KEY,
|
||||
upstream_node_id INTEGER NULL REFERENCES repmgr.nodes (node_id) DEFERRABLE,
|
||||
active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
node_name TEXT NOT NULL,
|
||||
type TEXT NOT NULL CHECK (type IN('primary','standby','witness','bdr')),
|
||||
location TEXT NOT NULL DEFAULT 'default',
|
||||
priority INT NOT NULL DEFAULT 100,
|
||||
conninfo TEXT NOT NULL,
|
||||
repluser VARCHAR(63) NOT NULL,
|
||||
slot_name TEXT NULL,
|
||||
config_file TEXT NOT NULL
|
||||
);
|
||||
|
||||
INSERT INTO repmgr.nodes
|
||||
(node_id, upstream_node_id, active, node_name, type, location, priority, conninfo, repluser, slot_name, config_file)
|
||||
SELECT id, upstream_node_id, active, name,
|
||||
CASE WHEN type = 'master' THEN 'primary' ELSE type END,
|
||||
'default', priority, conninfo, 'unknown', slot_name, 'unknown'
|
||||
FROM repmgr.repl_nodes
|
||||
ORDER BY id;
|
||||
|
||||
|
||||
-- convert "repmgr_$cluster.repl_event" to "event"
|
||||
|
||||
CREATE TABLE repmgr.events (
|
||||
node_id INTEGER NOT NULL,
|
||||
event TEXT NOT NULL,
|
||||
successful BOOLEAN NOT NULL DEFAULT TRUE,
|
||||
event_timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
details TEXT NULL
|
||||
);
|
||||
|
||||
INSERT INTO repmgr.events
|
||||
(node_id, event, successful, event_timestamp, details)
|
||||
SELECT node_id, event, successful, event_timestamp, details
|
||||
FROM repmgr.repl_events;
|
||||
|
||||
-- create new table "repmgr.voting_term"
|
||||
CREATE TABLE repmgr.voting_term (
|
||||
term INT NOT NULL
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX voting_term_restrict
|
||||
ON repmgr.voting_term ((TRUE));
|
||||
|
||||
CREATE RULE voting_term_delete AS
|
||||
ON DELETE TO repmgr.voting_term
|
||||
DO INSTEAD NOTHING;
|
||||
|
||||
INSERT INTO repmgr.voting_term (term) VALUES (1);
|
||||
|
||||
-- convert "repmgr_$cluster.repl_monitor" to "monitoring_history"
|
||||
|
||||
CREATE TABLE repmgr.monitoring_history (
|
||||
primary_node_id INTEGER NOT NULL,
|
||||
standby_node_id INTEGER NOT NULL,
|
||||
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||
last_wal_primary_location PG_LSN NOT NULL,
|
||||
last_wal_standby_location PG_LSN,
|
||||
replication_lag BIGINT NOT NULL,
|
||||
apply_lag BIGINT NOT NULL
|
||||
);
|
||||
|
||||
INSERT INTO repmgr.monitoring_history
|
||||
(primary_node_id, standby_node_id, last_monitor_time, last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag)
|
||||
SELECT primary_node, standby_node, last_monitor_time, last_apply_time, last_wal_primary_location::pg_lsn, last_wal_standby_location::pg_lsn, replication_lag, apply_lag
|
||||
FROM repmgr.repl_monitor;
|
||||
|
||||
CREATE INDEX idx_monitoring_history_time
|
||||
ON repmgr.monitoring_history (last_monitor_time, standby_node_id);
|
||||
|
||||
CREATE VIEW repmgr.show_nodes AS
|
||||
SELECT n.node_id,
|
||||
n.node_name,
|
||||
n.active,
|
||||
n.upstream_node_id,
|
||||
un.node_name AS upstream_node_name,
|
||||
n.type,
|
||||
n.priority,
|
||||
n.conninfo
|
||||
FROM repmgr.nodes n
|
||||
LEFT JOIN repmgr.nodes un
|
||||
ON un.node_id = n.upstream_node_id;
|
||||
|
||||
|
||||
/* ================= */
|
||||
/* repmgrd functions */
|
||||
/* ================= */
|
||||
|
||||
/* monitoring functions */
|
||||
|
||||
CREATE FUNCTION set_local_node_id(INT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'repmgr_set_local_node_id'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_local_node_id()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'repmgr_get_local_node_id'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION standby_set_last_updated()
|
||||
RETURNS TIMESTAMP WITH TIME ZONE
|
||||
AS 'MODULE_PATHNAME', 'repmgr_standby_set_last_updated'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION standby_get_last_updated()
|
||||
RETURNS TIMESTAMP WITH TIME ZONE
|
||||
AS 'MODULE_PATHNAME', 'repmgr_standby_get_last_updated'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION set_upstream_last_seen(INT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'repmgr_set_upstream_last_seen'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_upstream_last_seen()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'repmgr_get_upstream_last_seen'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_upstream_node_id()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'repmgr_get_upstream_node_id'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION set_upstream_node_id(INT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'repmgr_set_upstream_node_id'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
/* failover functions */
|
||||
|
||||
CREATE FUNCTION notify_follow_primary(INT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'repmgr_notify_follow_primary'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_new_primary()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'repmgr_get_new_primary'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION reset_voting_status()
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'repmgr_reset_voting_status'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_repmgrd_pid()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_repmgrd_pidfile()
|
||||
RETURNS TEXT
|
||||
AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION set_repmgrd_pid(INT, TEXT)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'set_repmgrd_pid'
|
||||
LANGUAGE C CALLED ON NULL INPUT;
|
||||
|
||||
CREATE FUNCTION repmgrd_is_running()
|
||||
RETURNS BOOL
|
||||
AS 'MODULE_PATHNAME', 'repmgrd_is_running'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION repmgrd_pause(BOOL)
|
||||
RETURNS VOID
|
||||
AS 'MODULE_PATHNAME', 'repmgrd_pause'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION repmgrd_is_paused()
|
||||
RETURNS BOOL
|
||||
AS 'MODULE_PATHNAME', 'repmgrd_is_paused'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
CREATE FUNCTION get_wal_receiver_pid()
|
||||
RETURNS INT
|
||||
AS 'MODULE_PATHNAME', 'repmgr_get_wal_receiver_pid'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
|
||||
/* views */
|
||||
|
||||
CREATE VIEW repmgr.replication_status AS
|
||||
SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name,
|
||||
n.type AS node_type, n.active, last_monitor_time,
|
||||
CASE WHEN n.type='standby' THEN m.last_wal_primary_location ELSE NULL END AS last_wal_primary_location,
|
||||
m.last_wal_standby_location,
|
||||
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.replication_lag) ELSE NULL END AS replication_lag,
|
||||
CASE WHEN n.type='standby' THEN
|
||||
CASE WHEN replication_lag > 0 THEN age(now(), m.last_apply_time) ELSE '0'::INTERVAL END
|
||||
ELSE NULL
|
||||
END AS replication_time_lag,
|
||||
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.apply_lag) ELSE NULL END AS apply_lag,
|
||||
AGE(NOW(), CASE WHEN pg_catalog.pg_is_in_recovery() THEN repmgr.standby_get_last_updated() ELSE m.last_monitor_time END) AS communication_time_lag
|
||||
FROM repmgr.monitoring_history m
|
||||
JOIN repmgr.nodes n ON m.standby_node_id = n.node_id
|
||||
WHERE (m.standby_node_id, m.last_monitor_time) IN (
|
||||
SELECT m1.standby_node_id, MAX(m1.last_monitor_time)
|
||||
FROM repmgr.monitoring_history m1 GROUP BY 1
|
||||
);
|
||||
|
||||
|
||||
|
||||
/* drop old tables */
|
||||
DROP TABLE repmgr.repl_nodes;
|
||||
DROP TABLE repmgr.repl_monitor;
|
||||
DROP TABLE repmgr.repl_events;
|
||||
|
||||
-- remove temporary table
|
||||
DROP TABLE repmgr_old_schema;
|
||||
@@ -35,6 +35,7 @@
|
||||
static bool copy_file(const char *src_file, const char *dest_file);
|
||||
static void format_archive_dir(PQExpBufferData *archive_dir);
|
||||
static t_server_action parse_server_action(const char *action);
|
||||
static const char *output_repmgrd_status(CheckStatus status);
|
||||
|
||||
static void exit_optformat_error(const char *error, int errcode);
|
||||
|
||||
@@ -52,9 +53,11 @@ static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info
|
||||
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||
static CheckStatus do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||
static CheckStatus do_node_check_repmgrd(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||
static CheckStatus do_node_check_replication_config_owner(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||
static CheckStatus do_node_check_db_connection(PGconn *conn, OutputMode mode);
|
||||
|
||||
|
||||
/*
|
||||
* NODE STATUS
|
||||
*
|
||||
@@ -941,6 +944,16 @@ do_node_check(void)
|
||||
exit(return_code);
|
||||
}
|
||||
|
||||
if (runtime_options.repmgrd == true)
|
||||
{
|
||||
return_code = do_node_check_repmgrd(conn,
|
||||
runtime_options.output_mode,
|
||||
&node_info,
|
||||
NULL);
|
||||
PQfinish(conn);
|
||||
exit(return_code);
|
||||
}
|
||||
|
||||
if (runtime_options.replication_config_owner == true)
|
||||
{
|
||||
return_code = do_node_check_replication_config_owner(conn,
|
||||
@@ -2024,7 +2037,6 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
CheckStatus
|
||||
do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
||||
{
|
||||
@@ -2159,6 +2171,53 @@ do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_in
|
||||
return status;
|
||||
}
|
||||
|
||||
CheckStatus
|
||||
do_node_check_repmgrd(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
||||
{
|
||||
CheckStatus status = CHECK_STATUS_OK;
|
||||
|
||||
if (mode == OM_CSV && list_output == NULL)
|
||||
{
|
||||
log_error(_("--csv output not provided with --repmgrd option"));
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
status = get_repmgrd_status(conn);
|
||||
switch (mode)
|
||||
{
|
||||
case OM_OPTFORMAT:
|
||||
printf("--repmgrd=%s\n",
|
||||
output_check_status(status));
|
||||
break;
|
||||
case OM_NAGIOS:
|
||||
printf("REPMGRD %s: %s\n",
|
||||
output_check_status(status),
|
||||
output_repmgrd_status(status));
|
||||
|
||||
break;
|
||||
case OM_CSV:
|
||||
case OM_TEXT:
|
||||
if (list_output != NULL)
|
||||
{
|
||||
check_status_list_set(list_output,
|
||||
"repmgrd",
|
||||
status,
|
||||
output_repmgrd_status(status));
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("%s (%s)\n",
|
||||
output_check_status(status),
|
||||
output_repmgrd_status(status));
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is not included in the general list output
|
||||
*/
|
||||
@@ -3570,6 +3629,25 @@ copy_file(const char *src_file, const char *dest_file)
|
||||
}
|
||||
|
||||
|
||||
static const char *
|
||||
output_repmgrd_status(CheckStatus status)
|
||||
{
|
||||
switch (status)
|
||||
{
|
||||
case CHECK_STATUS_OK:
|
||||
return "repmgrd running";
|
||||
case CHECK_STATUS_WARNING:
|
||||
return "repmgrd running but paused";
|
||||
case CHECK_STATUS_CRITICAL:
|
||||
return "repmgrd not running";
|
||||
case CHECK_STATUS_UNKNOWN:
|
||||
return "repmgrd status unknown";
|
||||
}
|
||||
|
||||
return "UNKNOWN";
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
do_node_help(void)
|
||||
{
|
||||
@@ -3612,6 +3690,7 @@ do_node_help(void)
|
||||
printf(_(" --role check node has expected role\n"));
|
||||
printf(_(" --slots check for inactive replication slots\n"));
|
||||
printf(_(" --missing-slots check for missing replication slots\n"));
|
||||
printf(_(" --repmgrd check if repmgrd is running\n"));
|
||||
printf(_(" --data-directory-config check repmgr's data directory configuration\n"));
|
||||
|
||||
puts("");
|
||||
|
||||
@@ -120,6 +120,7 @@ typedef struct
|
||||
bool missing_slots;
|
||||
bool has_passfile;
|
||||
bool replication_connection;
|
||||
bool repmgrd;
|
||||
bool data_directory_config;
|
||||
bool replication_config_owner;
|
||||
bool db_connection;
|
||||
@@ -175,7 +176,7 @@ typedef struct
|
||||
/* "node status" options */ \
|
||||
false, \
|
||||
/* "node check" options */ \
|
||||
false, false, false, false, false, false, false, false, false, false, false, false, \
|
||||
false, false, false, false, false, false, false, false, false, false, false, false, false, \
|
||||
/* "node rejoin" options */ \
|
||||
"", \
|
||||
/* "node service" options */ \
|
||||
|
||||
@@ -549,6 +549,10 @@ main(int argc, char **argv)
|
||||
runtime_options.data_directory_config = true;
|
||||
break;
|
||||
|
||||
case OPT_REPMGRD:
|
||||
runtime_options.repmgrd = true;
|
||||
break;
|
||||
|
||||
case OPT_REPLICATION_CONFIG_OWNER:
|
||||
runtime_options.replication_config_owner = true;
|
||||
break;
|
||||
|
||||
@@ -100,6 +100,7 @@
|
||||
#define OPT_DB_CONNECTION 1047
|
||||
#define OPT_VERIFY_BACKUP 1048
|
||||
#define OPT_RECOVERY_MIN_APPLY_DELAY 1049
|
||||
#define OPT_REPMGRD 1050
|
||||
|
||||
/* These options are for internal use only */
|
||||
#define OPT_CONFIG_ARCHIVE_DIR 2001
|
||||
@@ -193,6 +194,7 @@ static struct option long_options[] =
|
||||
{"role", no_argument, NULL, OPT_ROLE},
|
||||
{"slots", no_argument, NULL, OPT_SLOTS},
|
||||
{"missing-slots", no_argument, NULL, OPT_MISSING_SLOTS},
|
||||
{"repmgrd", no_argument, NULL, OPT_REPMGRD},
|
||||
{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
|
||||
{"replication-connection", no_argument, NULL, OPT_REPL_CONN},
|
||||
{"data-directory-config", no_argument, NULL, OPT_DATA_DIRECTORY_CONFIG},
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#define REPMGR_VERSION_DATE ""
|
||||
#define REPMGR_VERSION "5.3dev"
|
||||
#define REPMGR_VERSION "5.3.0"
|
||||
#define REPMGR_VERSION_NUM 50300
|
||||
#define REPMGR_EXTENSION_VERSION "5.3"
|
||||
#define REPMGR_EXTENSION_NUM 50300
|
||||
#define REPMGR_RELEASE_DATE "2021-XX-XX"
|
||||
#define REPMGR_RELEASE_DATE "2021-10-12"
|
||||
#define PG_ACTUAL_VERSION_NUM
|
||||
|
||||
@@ -172,37 +172,85 @@ void
|
||||
do_physical_node_check(PGconn *conn)
|
||||
{
|
||||
/*
|
||||
* Check if node record is active - if not, and `failover=automatic`, the
|
||||
* node won't be considered as a promotion candidate; this often happens
|
||||
* when a failed primary is recloned and the node was not re-registered,
|
||||
* giving the impression failover capability is there when it's not. In
|
||||
* this case abort with an error and a hint about registering.
|
||||
* If node record is "inactive"; if not, attempt to set it to "active".
|
||||
*
|
||||
* If `failover=manual`, repmgrd can continue to passively monitor the
|
||||
* node, but we should nevertheless issue a warning and the same hint.
|
||||
* Usually it will have become inactive due to e.g. a standby being shut down
|
||||
* while repmgrd was running in an unpaused state. In this case it's
|
||||
* perfectly reasonable to automatically mark the node as "active".
|
||||
*/
|
||||
|
||||
if (local_node_info.active == false)
|
||||
{
|
||||
char *hint = "Check that \"repmgr (primary|standby) register\" was executed for this node";
|
||||
RecoveryType recovery_type = get_recovery_type(conn);
|
||||
|
||||
/*
|
||||
* If the local node's recovery status is incompatible with its registered
|
||||
* status, e.g. registered as primary but running as a standby, refuse to start.
|
||||
*
|
||||
* This typically happens when a failed primary is recloned but the node was not
|
||||
* re-registered, leaving the cluster in a potentially ambiguous state. In
|
||||
* this case it would not be possible or desirable to attempt to set the
|
||||
* node to active; the user should ensure the cluster is in the correct state.
|
||||
*/
|
||||
if (recovery_type != RECTYPE_UNKNOWN && local_node_info.type != UNKNOWN)
|
||||
{
|
||||
bool require_reregister = false;
|
||||
PQExpBufferData event_details;
|
||||
initPQExpBuffer(&event_details);
|
||||
|
||||
if (recovery_type == RECTYPE_STANDBY && local_node_info.type != STANDBY)
|
||||
{
|
||||
appendPQExpBuffer(&event_details,
|
||||
_("node is registered as a %s but running as a standby"),
|
||||
get_node_type_string(local_node_info.type));
|
||||
|
||||
require_reregister = true;
|
||||
}
|
||||
else if (recovery_type == RECTYPE_PRIMARY && local_node_info.type == STANDBY)
|
||||
{
|
||||
log_error(_("node is registered as a standby but running as a %s"), get_node_type_string(local_node_info.type));
|
||||
require_reregister = true;
|
||||
}
|
||||
|
||||
if (require_reregister == true)
|
||||
{
|
||||
log_error("%s", event_details.data);
|
||||
log_hint(_("%s"), hint);
|
||||
|
||||
create_event_notification(NULL,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"repmgrd_start",
|
||||
false,
|
||||
event_details.data);
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to set node record active (unless explicitly configured not to)
|
||||
*/
|
||||
log_notice(_("setting node record for node \"%s\" (ID: %i) to \"active\""),
|
||||
local_node_info.node_name,
|
||||
local_node_info.node_id);
|
||||
|
||||
if (config_file_options.repmgrd_exit_on_inactive_node == false)
|
||||
{
|
||||
PGconn *primary_conn = get_primary_connection_quiet(conn, NULL, NULL);
|
||||
PGconn *primary_conn = get_primary_connection(conn, NULL, NULL);
|
||||
bool success = true;
|
||||
|
||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_error(_("unable to connect to the primary node to activate the node record"));
|
||||
success = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_notice(_("setting node record for node \"%s\" (ID: %i) to \"active\""),
|
||||
local_node_info.node_name,
|
||||
local_node_info.node_id);
|
||||
success = update_node_record_set_active(primary_conn, local_node_info.node_id, true);
|
||||
PQfinish(primary_conn);
|
||||
}
|
||||
@@ -210,33 +258,37 @@ do_physical_node_check(PGconn *conn)
|
||||
if (success == true)
|
||||
{
|
||||
local_node_info.active = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
switch (config_file_options.failover)
|
||||
/*
|
||||
* Corner-case where it was not possible to set the node to "active"
|
||||
*/
|
||||
if (local_node_info.active == false)
|
||||
{
|
||||
switch (config_file_options.failover)
|
||||
{
|
||||
/* "failover" is an enum, all values should be covered here */
|
||||
|
||||
/* "failover" is an enum, all values should be covered here */
|
||||
case FAILOVER_AUTOMATIC:
|
||||
log_error(_("this node is marked as inactive and cannot be used as a failover target"));
|
||||
log_hint(_("%s"), hint);
|
||||
|
||||
case FAILOVER_AUTOMATIC:
|
||||
log_error(_("this node is marked as inactive and cannot be used as a failover target"));
|
||||
log_hint(_("%s"), hint);
|
||||
create_event_notification(NULL,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"repmgrd_start",
|
||||
false,
|
||||
"node is inactive and cannot be used as a failover target");
|
||||
|
||||
create_event_notification(NULL,
|
||||
&config_file_options,
|
||||
config_file_options.node_id,
|
||||
"repmgrd_shutdown",
|
||||
false,
|
||||
"node is inactive and cannot be used as a failover target");
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
break;
|
||||
|
||||
terminate(ERR_BAD_CONFIG);
|
||||
break;
|
||||
|
||||
case FAILOVER_MANUAL:
|
||||
log_warning(_("this node is marked as inactive and will be passively monitored only"));
|
||||
log_hint(_("%s"), hint);
|
||||
break;
|
||||
case FAILOVER_MANUAL:
|
||||
log_warning(_("this node is marked as inactive and will be passively monitored only"));
|
||||
log_hint(_("%s"), hint);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1842,6 +1894,7 @@ monitor_streaming_standby(void)
|
||||
int former_upstream_node_id = local_node_info.upstream_node_id;
|
||||
NodeInfoList sibling_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||
PQExpBufferData event_details;
|
||||
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
|
||||
|
||||
update_node_record_set_primary(local_conn, local_node_info.node_id);
|
||||
record_status = get_node_record(local_conn, local_node_info.node_id, &local_node_info);
|
||||
@@ -1854,12 +1907,16 @@ monitor_streaming_standby(void)
|
||||
initPQExpBuffer(&event_details);
|
||||
appendPQExpBufferStr(&event_details,
|
||||
_("promotion command failed but promotion completed successfully"));
|
||||
create_event_notification(local_conn,
|
||||
&config_file_options,
|
||||
local_node_info.node_id,
|
||||
"repmgrd_failover_promote",
|
||||
true,
|
||||
event_details.data);
|
||||
|
||||
event_info.node_id = former_upstream_node_id;
|
||||
|
||||
create_event_notification_extended(local_conn,
|
||||
&config_file_options,
|
||||
local_node_info.node_id,
|
||||
"repmgrd_failover_promote",
|
||||
true,
|
||||
event_details.data,
|
||||
&event_info);
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
|
||||
@@ -3703,6 +3760,7 @@ promote_self(void)
|
||||
|
||||
{
|
||||
PQExpBufferData event_details;
|
||||
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
|
||||
|
||||
/* update own internal node record */
|
||||
record_status = get_node_record(local_conn, local_node_info.node_id, &local_node_info);
|
||||
@@ -3719,13 +3777,16 @@ promote_self(void)
|
||||
failed_primary.node_name,
|
||||
failed_primary.node_id);
|
||||
|
||||
event_info.node_id = failed_primary.node_id;
|
||||
|
||||
/* local_conn is now the primary connection */
|
||||
create_event_notification(local_conn,
|
||||
&config_file_options,
|
||||
local_node_info.node_id,
|
||||
"repmgrd_failover_promote",
|
||||
true,
|
||||
event_details.data);
|
||||
create_event_notification_extended(local_conn,
|
||||
&config_file_options,
|
||||
local_node_info.node_id,
|
||||
"repmgrd_failover_promote",
|
||||
true,
|
||||
event_details.data,
|
||||
&event_info);
|
||||
|
||||
termPQExpBuffer(&event_details);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user