mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-27 17:06:29 +00:00
Provisionally add view "repmgr.replication_status"
This commit is contained in:
50
README.md
50
README.md
@@ -115,8 +115,8 @@ tables:
|
|||||||
- `repmgr.events`: records events of interest
|
- `repmgr.events`: records events of interest
|
||||||
- `repmgr.nodes`: connection and status information for each server in the
|
- `repmgr.nodes`: connection and status information for each server in the
|
||||||
replication cluster
|
replication cluster
|
||||||
- `repmgr.monitor`: historical standby monitoring information written by `repmgrd`
|
- `repmgr.monitoring_history`: historical standby monitoring information written by `repmgrd`
|
||||||
XXX not yet implemented
|
|
||||||
|
|
||||||
views:
|
views:
|
||||||
- `repmgr.show_nodes`: based on the table `repl_nodes`, additionally showing the
|
- `repmgr.show_nodes`: based on the table `repl_nodes`, additionally showing the
|
||||||
@@ -323,6 +323,52 @@ The following replication settings may need to be adjusted:
|
|||||||
# wal_keep_segments = 5000
|
# wal_keep_segments = 5000
|
||||||
|
|
||||||
|
|
||||||
|
### Monitoring with `repmgrd`
|
||||||
|
|
||||||
|
When `repmgrd` is running with the option `monitoring_history=true`, it will
|
||||||
|
constantly write standby node status information to the `monitoring_history`
|
||||||
|
able, providing a near-real time overview of replication status on all nodes
|
||||||
|
in the cluster.
|
||||||
|
|
||||||
|
The view `replication_status` shows the most recent state for each node, e.g.:
|
||||||
|
|
||||||
|
repmgr=# SELECT * FROM repmgr.replication_status;
|
||||||
|
-[ RECORD 1 ]-------------+-----------------------------
|
||||||
|
primary_node | 1
|
||||||
|
standby_node | 2
|
||||||
|
standby_name | node2
|
||||||
|
node_type | standby
|
||||||
|
active | t
|
||||||
|
last_monitor_time | 2016-01-05 14:02:34.51713+09
|
||||||
|
last_wal_primary_location | 0/3012AF0
|
||||||
|
last_wal_standby_location | 0/3012AF0
|
||||||
|
replication_lag | 0 bytes
|
||||||
|
replication_time_lag | 00:00:03.463085
|
||||||
|
apply_lag | 0 bytes
|
||||||
|
communication_time_lag | 00:00:00.955385
|
||||||
|
|
||||||
|
The interval in which monitoring history is written is controlled by the
|
||||||
|
configuration parameter `monitor_interval_secs`; default is 2.
|
||||||
|
|
||||||
|
As this can generate a large amount of monitoring data in the `monitoring_history`
|
||||||
|
table , it's advisable to regularly purge historical data with
|
||||||
|
`repmgr cluster cleanup`; use the `-k/--keep-history` to specify how
|
||||||
|
many day's worth of data should be retained. *XXX not yet implemented*
|
||||||
|
|
||||||
|
It's possible to use `repmgrd` to provide monitoring only for some or all
|
||||||
|
nodes by setting `failover=manual` in the node's `repmgr.conf` file. In the
|
||||||
|
event of the node's upstream failing, no failover action will be taken
|
||||||
|
and the node will require manual intervention to be reattached to replication.
|
||||||
|
If this occurs, an event notification `standby_disconnect_manual` will be
|
||||||
|
created.
|
||||||
|
|
||||||
|
Note that when a standby node is not streaming directly from its upstream
|
||||||
|
node, e.g. recovering WAL from an archive, `apply_lag` will always appear as
|
||||||
|
`0 bytes`.
|
||||||
|
|
||||||
|
XXX ALTER TABLE monitoring_history SET UNLOGGED ;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Reference
|
Reference
|
||||||
---------
|
---------
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||||
\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
|
\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
|
||||||
|
|
||||||
CREATE TABLE nodes (
|
CREATE TABLE repmgr.nodes (
|
||||||
node_id INTEGER PRIMARY KEY,
|
node_id INTEGER PRIMARY KEY,
|
||||||
upstream_node_id INTEGER NULL REFERENCES nodes (node_id) DEFERRABLE,
|
upstream_node_id INTEGER NULL REFERENCES nodes (node_id) DEFERRABLE,
|
||||||
active BOOLEAN NOT NULL DEFAULT TRUE,
|
active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||||
@@ -15,7 +15,7 @@ CREATE TABLE nodes (
|
|||||||
config_file TEXT NOT NULL
|
config_file TEXT NOT NULL
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE events (
|
CREATE TABLE repmgr.events (
|
||||||
node_id INTEGER NOT NULL,
|
node_id INTEGER NOT NULL,
|
||||||
event TEXT NOT NULL,
|
event TEXT NOT NULL,
|
||||||
successful BOOLEAN NOT NULL DEFAULT TRUE,
|
successful BOOLEAN NOT NULL DEFAULT TRUE,
|
||||||
@@ -23,7 +23,7 @@ CREATE TABLE events (
|
|||||||
details TEXT NULL
|
details TEXT NULL
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE monitoring_history (
|
CREATE TABLE repmgr.monitoring_history (
|
||||||
primary_node_id INTEGER NOT NULL,
|
primary_node_id INTEGER NOT NULL,
|
||||||
standby_node_id INTEGER NOT NULL,
|
standby_node_id INTEGER NOT NULL,
|
||||||
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||||
@@ -34,9 +34,9 @@ CREATE TABLE monitoring_history (
|
|||||||
apply_lag BIGINT NOT NULL
|
apply_lag BIGINT NOT NULL
|
||||||
);
|
);
|
||||||
CREATE INDEX idx_monitoring_history_time
|
CREATE INDEX idx_monitoring_history_time
|
||||||
ON monitoring_history (last_monitor_time, standby_node_id);
|
ON repmgr.monitoring_history (last_monitor_time, standby_node_id);
|
||||||
|
|
||||||
CREATE VIEW show_nodes AS
|
CREATE VIEW repmgr.show_nodes AS
|
||||||
SELECT n.node_id,
|
SELECT n.node_id,
|
||||||
n.node_name,
|
n.node_name,
|
||||||
n.active,
|
n.active,
|
||||||
@@ -45,10 +45,30 @@ CREATE VIEW show_nodes AS
|
|||||||
n.type,
|
n.type,
|
||||||
n.priority,
|
n.priority,
|
||||||
n.conninfo
|
n.conninfo
|
||||||
FROM nodes n
|
FROM repmgr.nodes n
|
||||||
LEFT JOIN nodes un
|
LEFT JOIN repmgr.nodes un
|
||||||
ON un.node_id = n.upstream_node_id;
|
ON un.node_id = n.upstream_node_id;
|
||||||
|
|
||||||
|
-- repmgr.repmgr_get_last_updated()
|
||||||
|
CREATE VIEW repmgr.replication_status AS
|
||||||
|
SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name,
|
||||||
|
n.type AS node_type, n.active, last_monitor_time,
|
||||||
|
CASE WHEN n.type='standby' THEN m.last_wal_primary_location ELSE NULL END AS last_wal_primary_location,
|
||||||
|
m.last_wal_standby_location,
|
||||||
|
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.replication_lag) ELSE NULL END AS replication_lag,
|
||||||
|
CASE WHEN n.type='standby' THEN
|
||||||
|
CASE WHEN replication_lag > 0 THEN age(now(), m.last_apply_time) ELSE '0'::INTERVAL END
|
||||||
|
ELSE NULL
|
||||||
|
END AS replication_time_lag,
|
||||||
|
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.apply_lag) ELSE NULL END AS apply_lag,
|
||||||
|
AGE(NOW(), CASE WHEN pg_catalog.pg_is_in_recovery() THEN NOW() ELSE m.last_monitor_time END) AS communication_time_lag
|
||||||
|
FROM repmgr.monitoring_history m
|
||||||
|
JOIN repmgr.nodes n ON m.standby_node_id = n.node_id
|
||||||
|
WHERE (m.standby_node_id, m.last_monitor_time) IN (
|
||||||
|
SELECT m1.standby_node_id, MAX(m1.last_monitor_time)
|
||||||
|
FROM repmgr.monitoring_history m1 GROUP BY 1
|
||||||
|
);
|
||||||
|
|
||||||
/* repmgrd functions */
|
/* repmgrd functions */
|
||||||
|
|
||||||
CREATE FUNCTION request_vote(INT,INT)
|
CREATE FUNCTION request_vote(INT,INT)
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ BEGIN
|
|||||||
END$repmgr$;
|
END$repmgr$;
|
||||||
|
|
||||||
-- convert "repmgr_$cluster.repl_nodes" to "repmgr.nodes"
|
-- convert "repmgr_$cluster.repl_nodes" to "repmgr.nodes"
|
||||||
CREATE TABLE nodes (
|
CREATE TABLE repmgr.nodes (
|
||||||
node_id INTEGER PRIMARY KEY,
|
node_id INTEGER PRIMARY KEY,
|
||||||
upstream_node_id INTEGER NULL REFERENCES repmgr.nodes (node_id) DEFERRABLE,
|
upstream_node_id INTEGER NULL REFERENCES repmgr.nodes (node_id) DEFERRABLE,
|
||||||
active BOOLEAN NOT NULL DEFAULT TRUE,
|
active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||||
@@ -41,22 +41,22 @@ CREATE TABLE nodes (
|
|||||||
config_file TEXT NOT NULL
|
config_file TEXT NOT NULL
|
||||||
);
|
);
|
||||||
|
|
||||||
INSERT INTO nodes
|
INSERT INTO repmgr.nodes
|
||||||
(node_id, upstream_node_id, active, node_name, type, location, priority, conninfo, repluser, slot_name, config_file)
|
(node_id, upstream_node_id, active, node_name, type, location, priority, conninfo, repluser, slot_name, config_file)
|
||||||
SELECT id, upstream_node_id, active, name,
|
SELECT id, upstream_node_id, active, name,
|
||||||
CASE WHEN type = 'master' THEN 'primary' ELSE type END,
|
CASE WHEN type = 'master' THEN 'primary' ELSE type END,
|
||||||
'default', priority, conninfo, 'unknown', slot_name, 'unknown'
|
'default', priority, conninfo, 'unknown', slot_name, 'unknown'
|
||||||
FROM repl_nodes
|
FROM repmgr.repl_nodes
|
||||||
ORDER BY id;
|
ORDER BY id;
|
||||||
|
|
||||||
|
|
||||||
-- convert "repmgr_$cluster.repl_event" to "event"
|
-- convert "repmgr_$cluster.repl_event" to "event"
|
||||||
|
|
||||||
ALTER TABLE repl_events RENAME TO events;
|
ALTER TABLE repmgr.repl_events RENAME TO repmgr.events;
|
||||||
|
|
||||||
-- convert "repmgr_$cluster.repl_monitor" to "monitoring_history"
|
-- convert "repmgr_$cluster.repl_monitor" to "monitoring_history"
|
||||||
|
|
||||||
CREATE TABLE monitoring_history (
|
CREATE TABLE repmgr.monitoring_history (
|
||||||
primary_node_id INTEGER NOT NULL,
|
primary_node_id INTEGER NOT NULL,
|
||||||
standby_node_id INTEGER NOT NULL,
|
standby_node_id INTEGER NOT NULL,
|
||||||
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||||
@@ -67,20 +67,20 @@ CREATE TABLE monitoring_history (
|
|||||||
apply_lag BIGINT NOT NULL
|
apply_lag BIGINT NOT NULL
|
||||||
);
|
);
|
||||||
|
|
||||||
INSERT INTO monitoring_history
|
INSERT INTO repmgr.monitoring_history
|
||||||
(primary_node_id, standby_node_id, last_monitor_time, last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag)
|
(primary_node_id, standby_node_id, last_monitor_time, last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag)
|
||||||
SELECT primary_node_id, standby_node_id, last_monitor_time, last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag
|
SELECT primary_node_id, standby_node_id, last_monitor_time, last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag
|
||||||
FROM repl_monitor;
|
FROM repmgr.repl_monitor;
|
||||||
|
|
||||||
CREATE INDEX idx_monitoring_history_time
|
CREATE INDEX idx_monitoring_history_time
|
||||||
ON monitoring_history (last_monitor_time, standby_node_id);
|
ON repmgr.monitoring_history (last_monitor_time, standby_node_id);
|
||||||
|
|
||||||
|
|
||||||
-- recreate VIEW
|
-- recreate VIEW
|
||||||
|
|
||||||
DROP VIEW IF EXISTS repl_show_nodes;
|
DROP VIEW IF EXISTS repl_show_nodes;
|
||||||
|
|
||||||
CREATE VIEW show_nodes AS
|
CREATE VIEW repmgr.show_nodes AS
|
||||||
SELECT n.node_id,
|
SELECT n.node_id,
|
||||||
n.node_name,
|
n.node_name,
|
||||||
n.active,
|
n.active,
|
||||||
@@ -89,17 +89,17 @@ CREATE VIEW show_nodes AS
|
|||||||
n.type,
|
n.type,
|
||||||
n.priority,
|
n.priority,
|
||||||
n.conninfo
|
n.conninfo
|
||||||
FROM nodes n
|
FROM repmgr.nodes n
|
||||||
LEFT JOIN nodes un
|
LEFT JOIN repmgr.nodes un
|
||||||
ON un.node_id = n.upstream_node_id;
|
ON un.node_id = n.upstream_node_id;
|
||||||
|
|
||||||
DROP VIEW IF EXISTS repl_status;
|
DROP VIEW IF EXISTS repmgr.repl_status;
|
||||||
|
|
||||||
-- CREATE VIEW status ... ;
|
-- XXX CREATE VIEW repmgr.replication_status ... ;
|
||||||
|
|
||||||
/* drop old tables */
|
/* drop old tables */
|
||||||
DROP TABLE repl_nodes;
|
DROP TABLE repmgr.repl_nodes;
|
||||||
DROP TABLE repl_monitor;
|
DROP TABLE repmgr.repl_monitor;
|
||||||
|
|
||||||
|
|
||||||
/* repmgrd functions */
|
/* repmgrd functions */
|
||||||
@@ -139,13 +139,11 @@ CREATE FUNCTION reset_voting_status()
|
|||||||
AS '$libdir/repmgr', 'reset_voting_status'
|
AS '$libdir/repmgr', 'reset_voting_status'
|
||||||
LANGUAGE C STRICT;
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
|
||||||
CREATE FUNCTION am_bdr_failover_handler(INT)
|
CREATE FUNCTION am_bdr_failover_handler(INT)
|
||||||
RETURNS BOOL
|
RETURNS BOOL
|
||||||
AS '$libdir/repmgr', 'am_bdr_failover_handler'
|
AS '$libdir/repmgr', 'am_bdr_failover_handler'
|
||||||
LANGUAGE C STRICT;
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
|
||||||
CREATE FUNCTION unset_bdr_failover_handler()
|
CREATE FUNCTION unset_bdr_failover_handler()
|
||||||
RETURNS VOID
|
RETURNS VOID
|
||||||
AS '$libdir/repmgr', 'unset_bdr_failover_handler'
|
AS '$libdir/repmgr', 'unset_bdr_failover_handler'
|
||||||
|
|||||||
Reference in New Issue
Block a user