mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 15:16:29 +00:00
Compare commits
172 Commits
v5.0.0
...
REL5_1_STA
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d43270008c | ||
|
|
a99768fdd8 | ||
|
|
2a4e81ef1c | ||
|
|
0ad6aceceb | ||
|
|
53c9eacbc4 | ||
|
|
e93f1c0439 | ||
|
|
7332d0251c | ||
|
|
7006a6d9c3 | ||
|
|
b8677a0fa2 | ||
|
|
992d2e0e49 | ||
|
|
5c71809261 | ||
|
|
22f2ee3050 | ||
|
|
2371f30f8a | ||
|
|
eddf06b60b | ||
|
|
b9874cd751 | ||
|
|
8313944535 | ||
|
|
3bfaa8e722 | ||
|
|
ca42dd563b | ||
|
|
a2f73a5086 | ||
|
|
af144d39cb | ||
|
|
68ad58f5fc | ||
|
|
c76fee98ef | ||
|
|
de634eb593 | ||
|
|
03c2c8cebd | ||
|
|
da7db96e76 | ||
|
|
b4c9064903 | ||
|
|
5abef8e4ed | ||
|
|
0813a31c08 | ||
|
|
4c7be798e0 | ||
|
|
0a7c7ae7ab | ||
|
|
9ce1c2e640 | ||
|
|
5a2399cb25 | ||
|
|
54d33dd50b | ||
|
|
2b0218456a | ||
|
|
2b773480e7 | ||
|
|
a934f19e80 | ||
|
|
9040d53e55 | ||
|
|
cb19311b35 | ||
|
|
9ce7cb7012 | ||
|
|
687ed68ce2 | ||
|
|
8472d99277 | ||
|
|
177b84345d | ||
|
|
555351f8c1 | ||
|
|
1e90d5e018 | ||
|
|
d45e64ca9e | ||
|
|
374f19675d | ||
|
|
ce88f3ec43 | ||
|
|
5acdd69add | ||
|
|
71e23107e7 | ||
|
|
599bab590a | ||
|
|
cd80f265ac | ||
|
|
09f0be8ceb | ||
|
|
59159dede7 | ||
|
|
780453e168 | ||
|
|
8a27c89d18 | ||
|
|
0a2091d5d3 | ||
|
|
447054a630 | ||
|
|
d998cab3d0 | ||
|
|
7f460c88bf | ||
|
|
e59da2d74e | ||
|
|
bffb8fa11b | ||
|
|
d9cb38c7f0 | ||
|
|
f3258c5002 | ||
|
|
5d92c99bb9 | ||
|
|
6895916914 | ||
|
|
e64349e4da | ||
|
|
2e9bc31c8c | ||
|
|
325e3ea541 | ||
|
|
2b06f2d1ae | ||
|
|
304c1391cc | ||
|
|
b09631b3bc | ||
|
|
e561ddc8d3 | ||
|
|
06f0e5e94f | ||
|
|
12adb5e0d1 | ||
|
|
b691a1bd10 | ||
|
|
dd35c22033 | ||
|
|
ddde31b14e | ||
|
|
09a78111f6 | ||
|
|
76cea52755 | ||
|
|
57ba3ef19a | ||
|
|
0ddc9b8bbf | ||
|
|
5722c0a582 | ||
|
|
eb346ac6ae | ||
|
|
0bc0a28378 | ||
|
|
fb5ce720f3 | ||
|
|
7c96afc6fb | ||
|
|
6559258b53 | ||
|
|
9de31428f1 | ||
|
|
10304a1a3b | ||
|
|
63aac64938 | ||
|
|
8f6058c676 | ||
|
|
194b6d0948 | ||
|
|
c6dfe53f03 | ||
|
|
e218422eca | ||
|
|
e02e3dae29 | ||
|
|
6ef722956b | ||
|
|
cebb1249aa | ||
|
|
51a7c31833 | ||
|
|
76af2d9e08 | ||
|
|
3b03edebb6 | ||
|
|
eaee7145f6 | ||
|
|
2bb89d252b | ||
|
|
e782f2d949 | ||
|
|
5d058dc371 | ||
|
|
089b3ecb8b | ||
|
|
b4af80fdec | ||
|
|
cb7bbda021 | ||
|
|
9cf4616af1 | ||
|
|
6f01c54620 | ||
|
|
7ed0a99d70 | ||
|
|
e2a362a171 | ||
|
|
cd7f36a6fd | ||
|
|
ab9c84c655 | ||
|
|
0141bc2be7 | ||
|
|
4d6cff6c42 | ||
|
|
84b824d86a | ||
|
|
a7689ecd78 | ||
|
|
ef30892250 | ||
|
|
3ae6691d34 | ||
|
|
bd8eb82fb9 | ||
|
|
4d4ed3bcd6 | ||
|
|
7fdf2f1778 | ||
|
|
46222cc0ae | ||
|
|
afa88f0514 | ||
|
|
647c0c879e | ||
|
|
3f5d2f6ee9 | ||
|
|
ab6e5ceab3 | ||
|
|
eb1d5c4e93 | ||
|
|
b3c09c48bf | ||
|
|
e2ffeac67d | ||
|
|
4ed72eb901 | ||
|
|
f158e35c13 | ||
|
|
21475b9c70 | ||
|
|
95ee576052 | ||
|
|
ac753c2ba1 | ||
|
|
ce85ba6df5 | ||
|
|
c3aba173ea | ||
|
|
93acdcfda2 | ||
|
|
25fb24eee4 | ||
|
|
220ec7fc96 | ||
|
|
1a9bcddccd | ||
|
|
f0693271d3 | ||
|
|
c23162e787 | ||
|
|
b8f323af5a | ||
|
|
63217e436a | ||
|
|
45b9002e5b | ||
|
|
52f9cd3bae | ||
|
|
cc540a54e5 | ||
|
|
9083f26990 | ||
|
|
5405ae7100 | ||
|
|
aa2674e284 | ||
|
|
b007d5ed4b | ||
|
|
63ddc2d39e | ||
|
|
9976e646cd | ||
|
|
dc11330d58 | ||
|
|
be494f0d5f | ||
|
|
d7fd55be99 | ||
|
|
0574279ccb | ||
|
|
b74f965f54 | ||
|
|
b9e360d5b8 | ||
|
|
047249e980 | ||
|
|
14f46b076e | ||
|
|
52abe309df | ||
|
|
f45b9d7024 | ||
|
|
de67fa2441 | ||
|
|
5f6d970fd9 | ||
|
|
0dce03a5f8 | ||
|
|
5d81e03d2d | ||
|
|
fdb61a1dea | ||
|
|
8a38188c47 | ||
|
|
5dcca6b053 | ||
|
|
a0591afb1e |
@@ -2,7 +2,7 @@ License and Contributions
|
|||||||
=========================
|
=========================
|
||||||
|
|
||||||
`repmgr` is licensed under the GPL v3. All of its code and documentation is
|
`repmgr` is licensed under the GPL v3. All of its code and documentation is
|
||||||
Copyright 2010-2019, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
|
Copyright 2010-2020, 2ndQuadrant Limited. See the files COPYRIGHT and LICENSE for
|
||||||
details.
|
details.
|
||||||
|
|
||||||
The development of repmgr has primarily been sponsored by 2ndQuadrant customers.
|
The development of repmgr has primarily been sponsored by 2ndQuadrant customers.
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
Copyright (c) 2010-2019, 2ndQuadrant Limited
|
Copyright (c) 2010-2020, 2ndQuadrant Limited
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
|||||||
27
HISTORY
27
HISTORY
@@ -1,4 +1,29 @@
|
|||||||
5.0 2019-10-??
|
5.1.0 2020-04-13
|
||||||
|
repmgr: remove BDR 2.x support
|
||||||
|
repmgr: don't query upstream's data directory (Ian)
|
||||||
|
repmgr: rename --recovery-conf-only to --replication-conf-only (Ian)
|
||||||
|
repmgr: ensure postgresql.auto.conf is created with correct permissions (Ian)
|
||||||
|
repmgr: minimize requirement to check upstream data directory location
|
||||||
|
during "standby clone" (Ian)
|
||||||
|
repmgr: warn about missing pg_rewind prerequisites when excuting
|
||||||
|
"standby clone" (Ian)
|
||||||
|
repmgr: add --upstream option to "node check"
|
||||||
|
repmgr: report error code on follow/rejoin failure due to non-available
|
||||||
|
replication slot (Ian)
|
||||||
|
repmgr: ensure "node rejoin" checks for available replication slots (Ian)
|
||||||
|
repmgr: improve "standby switchover" completion checks (Ian)
|
||||||
|
repmgr: add replication configuration file ownership check to
|
||||||
|
"standby switchover" (Ian)
|
||||||
|
repmgr: check the demotion candidate's registered repmgr.conf file can
|
||||||
|
be found (laixiong; GitHub 615)
|
||||||
|
repmgr: consolidate replication connection code (Ian)
|
||||||
|
repmgr: check permissions for "pg_promote()" and fall back to pg_ctl
|
||||||
|
if necessary (Ian)
|
||||||
|
repmgr: in --dry-run mode, display promote command which will be used (Ian)
|
||||||
|
repmgr: enable "service_promote_command" in PostgreSQL 12 (Ian)
|
||||||
|
repmgr: accept option -S/--superuser for "node check"; GitHub #612 (Ian)
|
||||||
|
|
||||||
|
5.0 2019-10-15
|
||||||
general: add PostgreSQL 12 support (Ian)
|
general: add PostgreSQL 12 support (Ian)
|
||||||
general: parse configuration file using flex (Ian)
|
general: parse configuration file using flex (Ian)
|
||||||
repmgr: rename "repmgr daemon ..." commands to "repmgr service ..." (Ian)
|
repmgr: rename "repmgr daemon ..." commands to "repmgr service ..." (Ian)
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ EXTENSION = repmgr
|
|||||||
|
|
||||||
DATA = \
|
DATA = \
|
||||||
repmgr--unpackaged--4.0.sql \
|
repmgr--unpackaged--4.0.sql \
|
||||||
|
repmgr--unpackaged--5.1.sql \
|
||||||
repmgr--4.0.sql \
|
repmgr--4.0.sql \
|
||||||
repmgr--4.0--4.1.sql \
|
repmgr--4.0--4.1.sql \
|
||||||
repmgr--4.1.sql \
|
repmgr--4.1.sql \
|
||||||
@@ -21,7 +22,9 @@ DATA = \
|
|||||||
repmgr--4.3--4.4.sql \
|
repmgr--4.3--4.4.sql \
|
||||||
repmgr--4.4.sql \
|
repmgr--4.4.sql \
|
||||||
repmgr--4.4--5.0.sql \
|
repmgr--4.4--5.0.sql \
|
||||||
repmgr--5.0.sql
|
repmgr--5.0.sql \
|
||||||
|
repmgr--5.0--5.1.sql \
|
||||||
|
repmgr--5.1.sql
|
||||||
|
|
||||||
REGRESS = repmgr_extension
|
REGRESS = repmgr_extension
|
||||||
|
|
||||||
@@ -53,9 +56,9 @@ $(info Building against PostgreSQL $(MAJORVERSION))
|
|||||||
|
|
||||||
REPMGR_CLIENT_OBJS = repmgr-client.o \
|
REPMGR_CLIENT_OBJS = repmgr-client.o \
|
||||||
repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o \
|
repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o \
|
||||||
repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o repmgr-action-service.o repmgr-action-daemon.o \
|
repmgr-action-cluster.o repmgr-action-node.o repmgr-action-service.o repmgr-action-daemon.o \
|
||||||
configfile.o configfile-scan.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o sysutils.o
|
configfile.o configfile-scan.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o sysutils.o
|
||||||
REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o configfile-scan.o log.o dbutils.o strutil.o controldata.o compat.o sysutils.o
|
REPMGRD_OBJS = repmgrd.o repmgrd-physical.o configfile.o configfile-scan.o log.o dbutils.o strutil.o controldata.o compat.o sysutils.o
|
||||||
DATE=$(shell date "+%Y-%m-%d")
|
DATE=$(shell date "+%Y-%m-%d")
|
||||||
|
|
||||||
repmgr_version.h: repmgr_version.h.in
|
repmgr_version.h: repmgr_version.h.in
|
||||||
|
|||||||
28
README.md
28
README.md
@@ -7,32 +7,30 @@ replication capabilities with utilities to set up standby servers, monitor
|
|||||||
replication, and perform administrative tasks such as failover or switchover
|
replication, and perform administrative tasks such as failover or switchover
|
||||||
operations.
|
operations.
|
||||||
|
|
||||||
`repmgr 4` is a complete rewrite of the existing `repmgr` codebase, allowing
|
PostgreSQL 12, 11, 10, 9.6 and 9.5 are fully supported.
|
||||||
the use of all of the latest features in PostgreSQL replication.
|
|
||||||
|
|
||||||
PostgreSQL 11, 10, 9.6 and 9.5 are fully supported.
|
|
||||||
PostgreSQL 9.4 and 9.3 are supported, with some restrictions.
|
PostgreSQL 9.4 and 9.3 are supported, with some restrictions.
|
||||||
|
|
||||||
`repmgr` is distributed under the GNU GPL 3 and maintained by 2ndQuadrant.
|
`repmgr` is distributed under the GNU GPL 3 and maintained by 2ndQuadrant.
|
||||||
|
|
||||||
### BDR support
|
|
||||||
|
|
||||||
`repmgr 4` supports monitoring of a two-node BDR 2.0 cluster on PostgreSQL 9.6
|
|
||||||
only. Note that BDR 2.0 is not publicly available; please contact 2ndQuadrant
|
|
||||||
for details.
|
|
||||||
|
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
The main `repmgr` documentation is available here:
|
The full `repmgr` documentation is available here:
|
||||||
|
|
||||||
> [repmgr documentation](https://repmgr.org/docs/current/index.html)
|
> [repmgr documentation](https://repmgr.org/docs/current/index.html)
|
||||||
|
|
||||||
The `README` file for `repmgr` 3.x is available here:
|
The old `README` file for `repmgr` 3.x is available here:
|
||||||
|
|
||||||
> https://github.com/2ndQuadrant/repmgr/blob/REL3_3_STABLE/README.md
|
> https://github.com/2ndQuadrant/repmgr/blob/REL3_3_STABLE/README.md
|
||||||
|
|
||||||
|
Note that the `repmgr` 3.x series is no longer supported and contains known bugs;
|
||||||
|
please upgrade to the current `repmgr` version as soon as possible.
|
||||||
|
|
||||||
|
Versions
|
||||||
|
--------
|
||||||
|
|
||||||
|
For an overview of `repmgr` versions and PostgreSQL compatibility, see the
|
||||||
|
[repmgr compatibility matrix](https://repmgr.org/docs/current/install-requirements.html#INSTALL-COMPATIBILITY-MATRIX).
|
||||||
|
|
||||||
Files
|
Files
|
||||||
------
|
------
|
||||||
@@ -72,6 +70,8 @@ Please report bugs and other issues to:
|
|||||||
|
|
||||||
* https://github.com/2ndQuadrant/repmgr
|
* https://github.com/2ndQuadrant/repmgr
|
||||||
|
|
||||||
|
See
|
||||||
|
|
||||||
Further information is available at https://repmgr.org/
|
Further information is available at https://repmgr.org/
|
||||||
|
|
||||||
We'd love to hear from you about how you use repmgr. Case studies and
|
We'd love to hear from you about how you use repmgr. Case studies and
|
||||||
@@ -98,6 +98,8 @@ Further reading
|
|||||||
---------------
|
---------------
|
||||||
|
|
||||||
* [repmgr documentation](https://repmgr.org/docs/current/index.html)
|
* [repmgr documentation](https://repmgr.org/docs/current/index.html)
|
||||||
|
* [How to Automate PostgreSQL 12 Replication and Failover with repmgr - Part 1](https://www.2ndquadrant.com/en/blog/how-to-automate-postgresql-12-replication-and-failover-with-repmgr-part-1/)
|
||||||
|
* [How to Automate PostgreSQL 12 Replication and Failover with repmgr - Part 2](https://www.2ndquadrant.com/en/blog/how-to-automate-postgresql-12-replication-and-failover-with-repmgr-part-2/)
|
||||||
* https://blog.2ndquadrant.com/repmgr-3-2-is-here-barman-support-brand-new-high-availability-features/
|
* https://blog.2ndquadrant.com/repmgr-3-2-is-here-barman-support-brand-new-high-availability-features/
|
||||||
* https://blog.2ndquadrant.com/improvements-in-repmgr-3-1-4/
|
* https://blog.2ndquadrant.com/improvements-in-repmgr-3-1-4/
|
||||||
* https://blog.2ndquadrant.com/managing-useful-clusters-repmgr/
|
* https://blog.2ndquadrant.com/managing-useful-clusters-repmgr/
|
||||||
|
|||||||
2
compat.c
2
compat.c
@@ -6,7 +6,7 @@
|
|||||||
* supported PostgreSQL versions. They're unlikely to change but
|
* supported PostgreSQL versions. They're unlikely to change but
|
||||||
* it would be worth keeping an eye on them for any fixes/improvements.
|
* it would be worth keeping an eye on them for any fixes/improvements.
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
|
|||||||
2
compat.h
2
compat.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* compat.h
|
* compat.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
|
|||||||
82
configfile.c
82
configfile.c
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* config.c - parse repmgr.conf and other configuration-related functionality
|
* config.c - parse repmgr.conf and other configuration-related functionality
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -378,13 +378,6 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
*/
|
*/
|
||||||
options->witness_sync_interval = DEFAULT_WITNESS_SYNC_INTERVAL;
|
options->witness_sync_interval = DEFAULT_WITNESS_SYNC_INTERVAL;
|
||||||
|
|
||||||
/*-------------
|
|
||||||
* BDR settings
|
|
||||||
*-------------
|
|
||||||
*/
|
|
||||||
options->bdr_local_monitoring_only = false;
|
|
||||||
options->bdr_recovery_timeout = DEFAULT_BDR_RECOVERY_TIMEOUT;
|
|
||||||
|
|
||||||
/*-------------------------
|
/*-------------------------
|
||||||
* service command settings
|
* service command settings
|
||||||
*-------------------------
|
*-------------------------
|
||||||
@@ -569,8 +562,6 @@ _parse_config(t_configuration_options *options, ItemList *error_list, ItemList *
|
|||||||
void
|
void
|
||||||
parse_configuration_item(t_configuration_options *options, ItemList *error_list, ItemList *warning_list, const char *name, const char *value)
|
parse_configuration_item(t_configuration_options *options, ItemList *error_list, ItemList *warning_list, const char *name, const char *value)
|
||||||
{
|
{
|
||||||
bool known_parameter = true;
|
|
||||||
|
|
||||||
if (strcmp(name, "node_id") == 0)
|
if (strcmp(name, "node_id") == 0)
|
||||||
{
|
{
|
||||||
options->node_id = repmgr_atoi(value, name, error_list, MIN_NODE_ID);
|
options->node_id = repmgr_atoi(value, name, error_list, MIN_NODE_ID);
|
||||||
@@ -616,10 +607,8 @@ parse_configuration_item(t_configuration_options *options, ItemList *error_list,
|
|||||||
{
|
{
|
||||||
if (strcmp(value, "physical") == 0)
|
if (strcmp(value, "physical") == 0)
|
||||||
options->replication_type = REPLICATION_TYPE_PHYSICAL;
|
options->replication_type = REPLICATION_TYPE_PHYSICAL;
|
||||||
else if (strcmp(value, "bdr") == 0)
|
|
||||||
options->replication_type = REPLICATION_TYPE_BDR;
|
|
||||||
else
|
else
|
||||||
item_list_append(error_list, _("value for \"replication_type\" must be \"physical\" or \"bdr\""));
|
item_list_append(error_list, _("value for \"replication_type\" must be \"physical\""));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* log settings */
|
/* log settings */
|
||||||
@@ -778,12 +767,6 @@ parse_configuration_item(t_configuration_options *options, ItemList *error_list,
|
|||||||
else if (strcmp(name, "witness_sync_interval") == 0)
|
else if (strcmp(name, "witness_sync_interval") == 0)
|
||||||
options->witness_sync_interval = repmgr_atoi(value, name, error_list, 1);
|
options->witness_sync_interval = repmgr_atoi(value, name, error_list, 1);
|
||||||
|
|
||||||
/* BDR settings */
|
|
||||||
else if (strcmp(name, "bdr_local_monitoring_only") == 0)
|
|
||||||
options->bdr_local_monitoring_only = parse_bool(value, name, error_list);
|
|
||||||
else if (strcmp(name, "bdr_recovery_timeout") == 0)
|
|
||||||
options->bdr_recovery_timeout = repmgr_atoi(value, name, error_list, 0);
|
|
||||||
|
|
||||||
/* service settings */
|
/* service settings */
|
||||||
else if (strcmp(name, "pg_ctl_options") == 0)
|
else if (strcmp(name, "pg_ctl_options") == 0)
|
||||||
strncpy(options->pg_ctl_options, value, sizeof(options->pg_ctl_options));
|
strncpy(options->pg_ctl_options, value, sizeof(options->pg_ctl_options));
|
||||||
@@ -841,73 +824,47 @@ parse_configuration_item(t_configuration_options *options, ItemList *error_list,
|
|||||||
{
|
{
|
||||||
item_list_append(warning_list,
|
item_list_append(warning_list,
|
||||||
_("parameter \"cluster\" is deprecated and will be ignored"));
|
_("parameter \"cluster\" is deprecated and will be ignored"));
|
||||||
known_parameter = false;
|
|
||||||
}
|
}
|
||||||
else if (strcmp(name, "node") == 0)
|
else if (strcmp(name, "node") == 0)
|
||||||
{
|
{
|
||||||
item_list_append(warning_list,
|
item_list_append(warning_list,
|
||||||
_("parameter \"node\" has been renamed to \"node_id\""));
|
_("parameter \"node\" has been renamed to \"node_id\""));
|
||||||
known_parameter = false;
|
|
||||||
}
|
}
|
||||||
else if (strcmp(name, "upstream_node") == 0)
|
else if (strcmp(name, "upstream_node") == 0)
|
||||||
{
|
{
|
||||||
item_list_append(warning_list,
|
item_list_append(warning_list,
|
||||||
_("parameter \"upstream_node\" has been removed; use \"--upstream-node-id\" when cloning a standby"));
|
_("parameter \"upstream_node\" has been removed; use \"--upstream-node-id\" when cloning a standby"));
|
||||||
known_parameter = false;
|
|
||||||
}
|
}
|
||||||
else if (strcmp(name, "loglevel") == 0)
|
else if (strcmp(name, "loglevel") == 0)
|
||||||
{
|
{
|
||||||
item_list_append(warning_list,
|
item_list_append(warning_list,
|
||||||
_("parameter \"loglevel\" has been renamed to \"log_level\""));
|
_("parameter \"loglevel\" has been renamed to \"log_level\""));
|
||||||
known_parameter = false;
|
|
||||||
}
|
}
|
||||||
else if (strcmp(name, "logfacility") == 0)
|
else if (strcmp(name, "logfacility") == 0)
|
||||||
{
|
{
|
||||||
item_list_append(warning_list,
|
item_list_append(warning_list,
|
||||||
_("parameter \"logfacility\" has been renamed to \"log_facility\""));
|
_("parameter \"logfacility\" has been renamed to \"log_facility\""));
|
||||||
known_parameter = false;
|
|
||||||
}
|
}
|
||||||
else if (strcmp(name, "logfile") == 0)
|
else if (strcmp(name, "logfile") == 0)
|
||||||
{
|
{
|
||||||
item_list_append(warning_list,
|
item_list_append(warning_list,
|
||||||
_("parameter \"logfile\" has been renamed to \"log_file\""));
|
_("parameter \"logfile\" has been renamed to \"log_file\""));
|
||||||
known_parameter = false;
|
|
||||||
}
|
}
|
||||||
else if (strcmp(name, "master_reponse_timeout") == 0)
|
else if (strcmp(name, "master_reponse_timeout") == 0)
|
||||||
{
|
{
|
||||||
item_list_append(warning_list,
|
item_list_append(warning_list,
|
||||||
_("parameter \"master_reponse_timeout\" has been removed; use \"async_query_timeout\" instead"));
|
_("parameter \"master_reponse_timeout\" has been removed; use \"async_query_timeout\" instead"));
|
||||||
known_parameter = false;
|
|
||||||
}
|
}
|
||||||
else if (strcmp(name, "retry_promote_interval_secs") == 0)
|
else if (strcmp(name, "retry_promote_interval_secs") == 0)
|
||||||
{
|
{
|
||||||
item_list_append(warning_list,
|
item_list_append(warning_list,
|
||||||
_("parameter \"retry_promote_interval_secs\" has been removed; use \"primary_notification_timeout\" instead"));
|
_("parameter \"retry_promote_interval_secs\" has been removed; use \"primary_notification_timeout\" instead"));
|
||||||
known_parameter = false;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
known_parameter = false;
|
|
||||||
log_warning(_("%s/%s: unknown name/value pair provided; ignoring"), name, value);
|
log_warning(_("%s/%s: unknown name/value pair provided; ignoring"), name, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Raise an error if a known parameter is provided with an empty
|
|
||||||
* value. Currently there's no reason why empty parameters are needed;
|
|
||||||
* if we want to accept those, we'd need to add stricter default
|
|
||||||
* checking, as currently e.g. an empty `node_id` value will be converted
|
|
||||||
* to '0'.
|
|
||||||
*/
|
|
||||||
if (known_parameter == true && !strlen(value))
|
|
||||||
{
|
|
||||||
char error_message_buf[MAXLEN] = "";
|
|
||||||
|
|
||||||
maxlen_snprintf(error_message_buf,
|
|
||||||
_("\"%s\": no value provided"),
|
|
||||||
name);
|
|
||||||
|
|
||||||
item_list_append(error_list, error_message_buf);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1112,8 +1069,6 @@ parse_time_unit_parameter(const char *name, const char *value, char *dest, ItemL
|
|||||||
* with these):
|
* with these):
|
||||||
*
|
*
|
||||||
* - async_query_timeout
|
* - async_query_timeout
|
||||||
* - bdr_local_monitoring_only
|
|
||||||
* - bdr_recovery_timeout
|
|
||||||
* - child_nodes_check_interval
|
* - child_nodes_check_interval
|
||||||
* - child_nodes_connected_min_count
|
* - child_nodes_connected_min_count
|
||||||
* - child_nodes_connected_include_witness
|
* - child_nodes_connected_include_witness
|
||||||
@@ -1180,7 +1135,8 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
|||||||
_parse_config(&new_options, &config_errors, &config_warnings);
|
_parse_config(&new_options, &config_errors, &config_warnings);
|
||||||
|
|
||||||
|
|
||||||
if (server_type == PRIMARY || server_type == STANDBY)
|
if (new_options.failover == FAILOVER_AUTOMATIC
|
||||||
|
&& (server_type == PRIMARY || server_type == STANDBY))
|
||||||
{
|
{
|
||||||
if (new_options.promote_command[0] == '\0')
|
if (new_options.promote_command[0] == '\0')
|
||||||
{
|
{
|
||||||
@@ -1249,24 +1205,6 @@ reload_config(t_configuration_options *orig_options, t_server_type server_type)
|
|||||||
config_changed = true;
|
config_changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* bdr_local_monitoring_only */
|
|
||||||
if (orig_options->bdr_local_monitoring_only != new_options.bdr_local_monitoring_only)
|
|
||||||
{
|
|
||||||
orig_options->bdr_local_monitoring_only = new_options.bdr_local_monitoring_only;
|
|
||||||
log_info(_("\"bdr_local_monitoring_only\" is now \"%s\""), new_options.bdr_local_monitoring_only == true ? "TRUE" : "FALSE");
|
|
||||||
|
|
||||||
config_changed = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* bdr_recovery_timeout */
|
|
||||||
if (orig_options->bdr_recovery_timeout != new_options.bdr_recovery_timeout)
|
|
||||||
{
|
|
||||||
orig_options->bdr_recovery_timeout = new_options.bdr_recovery_timeout;
|
|
||||||
log_info(_("\"bdr_recovery_timeout\" is now \"%i\""), new_options.bdr_recovery_timeout);
|
|
||||||
|
|
||||||
config_changed = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* child_nodes_check_interval */
|
/* child_nodes_check_interval */
|
||||||
if (orig_options->child_nodes_check_interval != new_options.child_nodes_check_interval)
|
if (orig_options->child_nodes_check_interval != new_options.child_nodes_check_interval)
|
||||||
{
|
{
|
||||||
@@ -1882,6 +1820,9 @@ modify_auto_conf(const char *data_dir, KeyValueList *items)
|
|||||||
PQExpBufferData auto_conf_contents;
|
PQExpBufferData auto_conf_contents;
|
||||||
|
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
|
mode_t um;
|
||||||
|
struct stat auto_conf_st;
|
||||||
|
|
||||||
KeyValueList config = {NULL, NULL};
|
KeyValueList config = {NULL, NULL};
|
||||||
KeyValueListCell *cell = NULL;
|
KeyValueListCell *cell = NULL;
|
||||||
|
|
||||||
@@ -1937,7 +1878,16 @@ modify_auto_conf(const char *data_dir, KeyValueList *items)
|
|||||||
cell->key, cell->value);
|
cell->key, cell->value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
stat(auto_conf.data, &auto_conf_st);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set umask so the temporary file is created in the same mode as the original
|
||||||
|
* postgresql.auto.conf file.
|
||||||
|
*/
|
||||||
|
um = umask(~(auto_conf_st.st_mode));
|
||||||
fp = fopen(auto_conf_tmp.data, "w");
|
fp = fopen(auto_conf_tmp.data, "w");
|
||||||
|
umask(um);
|
||||||
|
|
||||||
if (fp == NULL)
|
if (fp == NULL)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "unable to open \"%s\": %s\n",
|
fprintf(stderr, "unable to open \"%s\": %s\n",
|
||||||
|
|||||||
14
configfile.h
14
configfile.h
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* configfile.h
|
* configfile.h
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
@@ -29,7 +29,7 @@
|
|||||||
#define TARGET_TIMELINE_LATEST 0
|
#define TARGET_TIMELINE_LATEST 0
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is defined src/include/utils.h, however it's not practical
|
* This is defined in src/include/utils.h, however it's not practical
|
||||||
* to include that from a frontend application.
|
* to include that from a frontend application.
|
||||||
*/
|
*/
|
||||||
#define PG_AUTOCONF_FILENAME "postgresql.auto.conf"
|
#define PG_AUTOCONF_FILENAME "postgresql.auto.conf"
|
||||||
@@ -161,10 +161,6 @@ typedef struct
|
|||||||
int child_nodes_disconnect_timeout;
|
int child_nodes_disconnect_timeout;
|
||||||
char child_nodes_disconnect_command[MAXPGPATH];
|
char child_nodes_disconnect_command[MAXPGPATH];
|
||||||
|
|
||||||
/* BDR settings */
|
|
||||||
bool bdr_local_monitoring_only;
|
|
||||||
bool bdr_recovery_timeout;
|
|
||||||
|
|
||||||
/* service settings */
|
/* service settings */
|
||||||
char pg_ctl_options[MAXLEN];
|
char pg_ctl_options[MAXLEN];
|
||||||
char service_start_command[MAXPGPATH];
|
char service_start_command[MAXPGPATH];
|
||||||
@@ -193,6 +189,7 @@ typedef struct
|
|||||||
|
|
||||||
/* undocumented test settings */
|
/* undocumented test settings */
|
||||||
int promote_delay;
|
int promote_delay;
|
||||||
|
int failover_delay;
|
||||||
} t_configuration_options;
|
} t_configuration_options;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -238,8 +235,6 @@ typedef struct
|
|||||||
DEFAULT_CHILD_NODES_CONNECTED_MIN_COUNT, \
|
DEFAULT_CHILD_NODES_CONNECTED_MIN_COUNT, \
|
||||||
DEFAULT_CHILD_NODES_CONNECTED_INCLUDE_WITNESS, \
|
DEFAULT_CHILD_NODES_CONNECTED_INCLUDE_WITNESS, \
|
||||||
DEFAULT_CHILD_NODES_DISCONNECT_TIMEOUT, "", \
|
DEFAULT_CHILD_NODES_DISCONNECT_TIMEOUT, "", \
|
||||||
/* BDR settings */ \
|
|
||||||
false, DEFAULT_BDR_RECOVERY_TIMEOUT, \
|
|
||||||
/* service settings */ \
|
/* service settings */ \
|
||||||
"", "", "", "", "", "", \
|
"", "", "", "", "", "", \
|
||||||
/* repmgrd service settings */ \
|
/* repmgrd service settings */ \
|
||||||
@@ -249,8 +244,9 @@ typedef struct
|
|||||||
/* barman settings */ \
|
/* barman settings */ \
|
||||||
"", "", "", \
|
"", "", "", \
|
||||||
/* rsync/ssh settings */ \
|
/* rsync/ssh settings */ \
|
||||||
"", "", \
|
"", "", \
|
||||||
/* undocumented test settings */ \
|
/* undocumented test settings */ \
|
||||||
|
0, \
|
||||||
0 \
|
0 \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
22
configure
vendored
22
configure
vendored
@@ -1,6 +1,6 @@
|
|||||||
#! /bin/sh
|
#! /bin/sh
|
||||||
# Guess values for system-dependent variables and create Makefiles.
|
# Guess values for system-dependent variables and create Makefiles.
|
||||||
# Generated by GNU Autoconf 2.69 for repmgr 5.0.
|
# Generated by GNU Autoconf 2.69 for repmgr 5.1.0.
|
||||||
#
|
#
|
||||||
# Report bugs to <repmgr@googlegroups.com>.
|
# Report bugs to <repmgr@googlegroups.com>.
|
||||||
#
|
#
|
||||||
@@ -11,7 +11,7 @@
|
|||||||
# This configure script is free software; the Free Software Foundation
|
# This configure script is free software; the Free Software Foundation
|
||||||
# gives unlimited permission to copy, distribute and modify it.
|
# gives unlimited permission to copy, distribute and modify it.
|
||||||
#
|
#
|
||||||
# Copyright (c) 2010-2019, 2ndQuadrant Ltd.
|
# Copyright (c) 2010-2020, 2ndQuadrant Ltd.
|
||||||
## -------------------- ##
|
## -------------------- ##
|
||||||
## M4sh Initialization. ##
|
## M4sh Initialization. ##
|
||||||
## -------------------- ##
|
## -------------------- ##
|
||||||
@@ -582,8 +582,8 @@ MAKEFLAGS=
|
|||||||
# Identity of this package.
|
# Identity of this package.
|
||||||
PACKAGE_NAME='repmgr'
|
PACKAGE_NAME='repmgr'
|
||||||
PACKAGE_TARNAME='repmgr'
|
PACKAGE_TARNAME='repmgr'
|
||||||
PACKAGE_VERSION='5.0'
|
PACKAGE_VERSION='5.1.0'
|
||||||
PACKAGE_STRING='repmgr 5.0'
|
PACKAGE_STRING='repmgr 5.1.0'
|
||||||
PACKAGE_BUGREPORT='repmgr@googlegroups.com'
|
PACKAGE_BUGREPORT='repmgr@googlegroups.com'
|
||||||
PACKAGE_URL='https://repmgr.org/'
|
PACKAGE_URL='https://repmgr.org/'
|
||||||
|
|
||||||
@@ -1181,7 +1181,7 @@ if test "$ac_init_help" = "long"; then
|
|||||||
# Omit some internal or obsolete options to make the list less imposing.
|
# Omit some internal or obsolete options to make the list less imposing.
|
||||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||||
cat <<_ACEOF
|
cat <<_ACEOF
|
||||||
\`configure' configures repmgr 5.0 to adapt to many kinds of systems.
|
\`configure' configures repmgr 5.1.0 to adapt to many kinds of systems.
|
||||||
|
|
||||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||||
|
|
||||||
@@ -1242,7 +1242,7 @@ fi
|
|||||||
|
|
||||||
if test -n "$ac_init_help"; then
|
if test -n "$ac_init_help"; then
|
||||||
case $ac_init_help in
|
case $ac_init_help in
|
||||||
short | recursive ) echo "Configuration of repmgr 5.0:";;
|
short | recursive ) echo "Configuration of repmgr 5.1.0:";;
|
||||||
esac
|
esac
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
|
|
||||||
@@ -1316,14 +1316,14 @@ fi
|
|||||||
test -n "$ac_init_help" && exit $ac_status
|
test -n "$ac_init_help" && exit $ac_status
|
||||||
if $ac_init_version; then
|
if $ac_init_version; then
|
||||||
cat <<\_ACEOF
|
cat <<\_ACEOF
|
||||||
repmgr configure 5.0
|
repmgr configure 5.1.0
|
||||||
generated by GNU Autoconf 2.69
|
generated by GNU Autoconf 2.69
|
||||||
|
|
||||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||||
This configure script is free software; the Free Software Foundation
|
This configure script is free software; the Free Software Foundation
|
||||||
gives unlimited permission to copy, distribute and modify it.
|
gives unlimited permission to copy, distribute and modify it.
|
||||||
|
|
||||||
Copyright (c) 2010-2019, 2ndQuadrant Ltd.
|
Copyright (c) 2010-2020, 2ndQuadrant Ltd.
|
||||||
_ACEOF
|
_ACEOF
|
||||||
exit
|
exit
|
||||||
fi
|
fi
|
||||||
@@ -1335,7 +1335,7 @@ cat >config.log <<_ACEOF
|
|||||||
This file contains any messages produced by compilers while
|
This file contains any messages produced by compilers while
|
||||||
running configure, to aid debugging if configure makes a mistake.
|
running configure, to aid debugging if configure makes a mistake.
|
||||||
|
|
||||||
It was created by repmgr $as_me 5.0, which was
|
It was created by repmgr $as_me 5.1.0, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
$ $0 $@
|
$ $0 $@
|
||||||
@@ -2487,7 +2487,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
|||||||
# report actual input values of CONFIG_FILES etc. instead of their
|
# report actual input values of CONFIG_FILES etc. instead of their
|
||||||
# values after options handling.
|
# values after options handling.
|
||||||
ac_log="
|
ac_log="
|
||||||
This file was extended by repmgr $as_me 5.0, which was
|
This file was extended by repmgr $as_me 5.1.0, which was
|
||||||
generated by GNU Autoconf 2.69. Invocation command line was
|
generated by GNU Autoconf 2.69. Invocation command line was
|
||||||
|
|
||||||
CONFIG_FILES = $CONFIG_FILES
|
CONFIG_FILES = $CONFIG_FILES
|
||||||
@@ -2550,7 +2550,7 @@ _ACEOF
|
|||||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||||
ac_cs_version="\\
|
ac_cs_version="\\
|
||||||
repmgr config.status 5.0
|
repmgr config.status 5.1.0
|
||||||
configured by $0, generated by GNU Autoconf 2.69,
|
configured by $0, generated by GNU Autoconf 2.69,
|
||||||
with options \\"\$ac_cs_config\\"
|
with options \\"\$ac_cs_config\\"
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
AC_INIT([repmgr], [5.0], [repmgr@googlegroups.com], [repmgr], [https://repmgr.org/])
|
AC_INIT([repmgr], [5.1.0], [repmgr@googlegroups.com], [repmgr], [https://repmgr.org/])
|
||||||
|
|
||||||
AC_COPYRIGHT([Copyright (c) 2010-2019, 2ndQuadrant Ltd.])
|
AC_COPYRIGHT([Copyright (c) 2010-2020, 2ndQuadrant Ltd.])
|
||||||
|
|
||||||
AC_CONFIG_HEADER(config.h)
|
AC_CONFIG_HEADER(config.h)
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
* running. For that reason we can't use on the pg_control_*() functions
|
* running. For that reason we can't use on the pg_control_*() functions
|
||||||
* provided in PostgreSQL 9.6 and later.
|
* provided in PostgreSQL 9.6 and later.
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* controldata.h
|
* controldata.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
|
|||||||
119
dbutils.h
119
dbutils.h
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* dbutils.h
|
* dbutils.h
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -60,11 +60,6 @@
|
|||||||
"NULL AS attached "
|
"NULL AS attached "
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define BDR2_NODES_COLUMNS "node_sysid, node_timeline, node_dboid, node_name, node_local_dsn, ''"
|
|
||||||
#define BDR3_NODES_COLUMNS "ns.node_id, 0, 0, ns.node_name, ns.interface_connstr, ns.peer_state_name"
|
|
||||||
|
|
||||||
|
|
||||||
#define ERRBUFF_SIZE 512
|
#define ERRBUFF_SIZE 512
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
@@ -72,8 +67,7 @@ typedef enum
|
|||||||
UNKNOWN = 0,
|
UNKNOWN = 0,
|
||||||
PRIMARY,
|
PRIMARY,
|
||||||
STANDBY,
|
STANDBY,
|
||||||
WITNESS,
|
WITNESS
|
||||||
BDR
|
|
||||||
} t_server_type;
|
} t_server_type;
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
@@ -125,14 +119,21 @@ typedef enum
|
|||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
|
/* unable to query "pg_stat_replication" or other error */
|
||||||
NODE_ATTACHED_UNKNOWN = -1,
|
NODE_ATTACHED_UNKNOWN = -1,
|
||||||
NODE_DETACHED,
|
/* node has record in "pg_stat_replication" and state is not "streaming" */
|
||||||
NODE_ATTACHED
|
NODE_ATTACHED,
|
||||||
|
/* node has record in "pg_stat_replication" but state is not "streaming" */
|
||||||
|
NODE_NOT_ATTACHED,
|
||||||
|
/* node has no record in "pg_stat_replication" */
|
||||||
|
NODE_DETACHED
|
||||||
} NodeAttached;
|
} NodeAttached;
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
SLOT_UNKNOWN = -1,
|
SLOT_UNKNOWN = -1,
|
||||||
|
SLOT_NOT_FOUND,
|
||||||
|
SLOT_NOT_PHYSICAL,
|
||||||
SLOT_INACTIVE,
|
SLOT_INACTIVE,
|
||||||
SLOT_ACTIVE
|
SLOT_ACTIVE
|
||||||
} ReplSlotStatus;
|
} ReplSlotStatus;
|
||||||
@@ -170,6 +171,7 @@ typedef struct
|
|||||||
char current_timestamp[MAXLEN];
|
char current_timestamp[MAXLEN];
|
||||||
bool in_recovery;
|
bool in_recovery;
|
||||||
TimeLineID timeline_id;
|
TimeLineID timeline_id;
|
||||||
|
char timeline_id_str[MAXLEN];
|
||||||
XLogRecPtr last_wal_receive_lsn;
|
XLogRecPtr last_wal_receive_lsn;
|
||||||
XLogRecPtr last_wal_replay_lsn;
|
XLogRecPtr last_wal_replay_lsn;
|
||||||
char last_xact_replay_timestamp[MAXLEN];
|
char last_xact_replay_timestamp[MAXLEN];
|
||||||
@@ -324,45 +326,6 @@ typedef struct s_connection_user
|
|||||||
#define T_CONNECTION_USER_INITIALIZER { "", false }
|
#define T_CONNECTION_USER_INITIALIZER { "", false }
|
||||||
|
|
||||||
|
|
||||||
/* represents an entry in bdr.bdr_nodes */
|
|
||||||
typedef struct s_bdr_node_info
|
|
||||||
{
|
|
||||||
char node_sysid[MAXLEN];
|
|
||||||
uint32 node_timeline;
|
|
||||||
uint32 node_dboid;
|
|
||||||
char node_name[MAXLEN];
|
|
||||||
char node_local_dsn[MAXLEN];
|
|
||||||
char peer_state_name[MAXLEN];
|
|
||||||
} t_bdr_node_info;
|
|
||||||
|
|
||||||
#define T_BDR_NODE_INFO_INITIALIZER { \
|
|
||||||
"", InvalidOid, InvalidOid, \
|
|
||||||
"", "", "" \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* structs to store a list of BDR node records */
|
|
||||||
typedef struct BdrNodeInfoListCell
|
|
||||||
{
|
|
||||||
struct BdrNodeInfoListCell *next;
|
|
||||||
t_bdr_node_info *node_info;
|
|
||||||
} BdrNodeInfoListCell;
|
|
||||||
|
|
||||||
typedef struct BdrNodeInfoList
|
|
||||||
{
|
|
||||||
BdrNodeInfoListCell *head;
|
|
||||||
BdrNodeInfoListCell *tail;
|
|
||||||
int node_count;
|
|
||||||
} BdrNodeInfoList;
|
|
||||||
|
|
||||||
#define T_BDR_NODE_INFO_LIST_INITIALIZER { \
|
|
||||||
NULL, \
|
|
||||||
NULL, \
|
|
||||||
0 \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
char filepath[MAXPGPATH];
|
char filepath[MAXPGPATH];
|
||||||
@@ -372,6 +335,7 @@ typedef struct
|
|||||||
|
|
||||||
#define T_CONFIGFILE_INFO_INITIALIZER { "", "", false }
|
#define T_CONFIGFILE_INFO_INITIALIZER { "", "", false }
|
||||||
|
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
int size;
|
int size;
|
||||||
@@ -381,6 +345,7 @@ typedef struct
|
|||||||
|
|
||||||
#define T_CONFIGFILE_LIST_INITIALIZER { 0, 0, NULL }
|
#define T_CONFIGFILE_LIST_INITIALIZER { 0, 0, NULL }
|
||||||
|
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
uint64 system_identifier;
|
uint64 system_identifier;
|
||||||
@@ -420,10 +385,6 @@ typedef struct RepmgrdInfo {
|
|||||||
/* utility functions */
|
/* utility functions */
|
||||||
|
|
||||||
XLogRecPtr parse_lsn(const char *str);
|
XLogRecPtr parse_lsn(const char *str);
|
||||||
|
|
||||||
extern void
|
|
||||||
wrap_ddl_query(PQExpBufferData *query_buf, int replication_type, const char *fmt,...)
|
|
||||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
|
|
||||||
bool atobool(const char *value);
|
bool atobool(const char *value);
|
||||||
|
|
||||||
/* connection functions */
|
/* connection functions */
|
||||||
@@ -432,13 +393,19 @@ PGconn *establish_db_connection(const char *conninfo,
|
|||||||
PGconn *establish_db_connection_quiet(const char *conninfo);
|
PGconn *establish_db_connection_quiet(const char *conninfo);
|
||||||
PGconn *establish_db_connection_by_params(t_conninfo_param_list *param_list,
|
PGconn *establish_db_connection_by_params(t_conninfo_param_list *param_list,
|
||||||
const bool exit_on_error);
|
const bool exit_on_error);
|
||||||
|
PGconn *establish_db_connection_with_replacement_param(const char *conninfo,
|
||||||
|
const char *param,
|
||||||
|
const char *value,
|
||||||
|
const bool exit_on_error);
|
||||||
|
PGconn *establish_replication_connection_from_conn(PGconn *conn, const char *repluser);
|
||||||
|
PGconn *establish_replication_connection_from_conninfo(const char *conninfo, const char *repluser);
|
||||||
|
|
||||||
PGconn *establish_primary_db_connection(PGconn *conn,
|
PGconn *establish_primary_db_connection(PGconn *conn,
|
||||||
const bool exit_on_error);
|
const bool exit_on_error);
|
||||||
PGconn *get_primary_connection(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out);
|
PGconn *get_primary_connection(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out);
|
||||||
PGconn *get_primary_connection_quiet(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out);
|
PGconn *get_primary_connection_quiet(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out);
|
||||||
|
PGconn *duplicate_connection(PGconn *conn, const char *user, bool replication);
|
||||||
|
|
||||||
bool is_superuser_connection(PGconn *conn, t_connection_user *userinfo);
|
|
||||||
bool connection_has_pg_settings(PGconn *conn);
|
|
||||||
void close_connection(PGconn **conn);
|
void close_connection(PGconn **conn);
|
||||||
|
|
||||||
/* conninfo manipulation functions */
|
/* conninfo manipulation functions */
|
||||||
@@ -467,6 +434,7 @@ bool set_config(PGconn *conn, const char *config_param, const char *config_valu
|
|||||||
bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
||||||
int guc_set(PGconn *conn, const char *parameter, const char *op, const char *value);
|
int guc_set(PGconn *conn, const char *parameter, const char *op, const char *value);
|
||||||
bool get_pg_setting(PGconn *conn, const char *setting, char *output);
|
bool get_pg_setting(PGconn *conn, const char *setting, char *output);
|
||||||
|
bool get_pg_setting_bool(PGconn *conn, const char *setting, bool *output);
|
||||||
bool get_pg_setting_int(PGconn *conn, const char *setting, int *output);
|
bool get_pg_setting_int(PGconn *conn, const char *setting, int *output);
|
||||||
bool alter_system_int(PGconn *conn, const char *name, int value);
|
bool alter_system_int(PGconn *conn, const char *name, int value);
|
||||||
bool pg_reload_conf(PGconn *conn);
|
bool pg_reload_conf(PGconn *conn);
|
||||||
@@ -482,6 +450,12 @@ bool identify_system(PGconn *repl_conn, t_system_identification *identification
|
|||||||
uint64 system_identifier(PGconn *conn);
|
uint64 system_identifier(PGconn *conn);
|
||||||
TimeLineHistoryEntry *get_timeline_history(PGconn *repl_conn, TimeLineID tli);
|
TimeLineHistoryEntry *get_timeline_history(PGconn *repl_conn, TimeLineID tli);
|
||||||
|
|
||||||
|
/* user/role information functions */
|
||||||
|
bool can_execute_pg_promote(PGconn *conn);
|
||||||
|
bool connection_has_pg_settings(PGconn *conn);
|
||||||
|
bool is_replication_role(PGconn *conn, char *rolname);
|
||||||
|
bool is_superuser_connection(PGconn *conn, t_connection_user *userinfo);
|
||||||
|
|
||||||
/* repmgrd shared memory functions */
|
/* repmgrd shared memory functions */
|
||||||
bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
|
bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id);
|
||||||
int repmgrd_get_local_node_id(PGconn *conn);
|
int repmgrd_get_local_node_id(PGconn *conn);
|
||||||
@@ -559,10 +533,14 @@ PGresult *get_event_records(PGconn *conn, int node_id, const char *node_name,
|
|||||||
|
|
||||||
/* replication slot functions */
|
/* replication slot functions */
|
||||||
void create_slot_name(char *slot_name, int node_id);
|
void create_slot_name(char *slot_name, int node_id);
|
||||||
bool create_replication_slot(PGconn *conn, char *slot_name, PQExpBufferData *error_msg);
|
|
||||||
bool drop_replication_slot(PGconn *conn, char *slot_name);
|
bool create_replication_slot_sql(PGconn *conn, char *slot_name, PQExpBufferData *error_msg);
|
||||||
|
bool create_replication_slot_replprot(PGconn *conn, PGconn *repl_conn, char *slot_name, PQExpBufferData *error_msg);
|
||||||
|
bool drop_replication_slot_sql(PGconn *conn, char *slot_name);
|
||||||
|
bool drop_replication_slot_replprot(PGconn *repl_conn, char *slot_name);
|
||||||
|
|
||||||
RecordStatus get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
|
RecordStatus get_slot_record(PGconn *conn, char *slot_name, t_replication_slot *record);
|
||||||
int get_free_replication_slot_count(PGconn *conn);
|
int get_free_replication_slot_count(PGconn *conn, int *max_replication_slots);
|
||||||
int get_inactive_replication_slots(PGconn *conn, KeyValueList *list);
|
int get_inactive_replication_slots(PGconn *conn, KeyValueList *list);
|
||||||
|
|
||||||
/* tablespace functions */
|
/* tablespace functions */
|
||||||
@@ -614,35 +592,14 @@ XLogRecPtr get_last_wal_receive_location(PGconn *conn);
|
|||||||
void init_replication_info(ReplInfo *replication_info);
|
void init_replication_info(ReplInfo *replication_info);
|
||||||
bool get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replication_info);
|
bool get_replication_info(PGconn *conn, t_server_type node_type, ReplInfo *replication_info);
|
||||||
int get_replication_lag_seconds(PGconn *conn);
|
int get_replication_lag_seconds(PGconn *conn);
|
||||||
TimeLineID get_node_timeline(PGconn *conn);
|
TimeLineID get_node_timeline(PGconn *conn, char *timeline_id_str);
|
||||||
void get_node_replication_stats(PGconn *conn, t_node_info *node_info);
|
void get_node_replication_stats(PGconn *conn, t_node_info *node_info);
|
||||||
NodeAttached is_downstream_node_attached(PGconn *conn, char *node_name);
|
NodeAttached is_downstream_node_attached(PGconn *conn, char *node_name, char **node_state);
|
||||||
void set_upstream_last_seen(PGconn *conn, int upstream_node_id);
|
void set_upstream_last_seen(PGconn *conn, int upstream_node_id);
|
||||||
int get_upstream_last_seen(PGconn *conn, t_server_type node_type);
|
int get_upstream_last_seen(PGconn *conn, t_server_type node_type);
|
||||||
|
|
||||||
bool is_wal_replay_paused(PGconn *conn, bool check_pending_wal);
|
bool is_wal_replay_paused(PGconn *conn, bool check_pending_wal);
|
||||||
|
|
||||||
/* BDR functions */
|
|
||||||
int get_bdr_version_num(void);
|
|
||||||
void get_all_bdr_node_records(PGconn *conn, BdrNodeInfoList *node_list);
|
|
||||||
RecordStatus get_bdr_node_record_by_name(PGconn *conn, const char *node_name, t_bdr_node_info *node_info);
|
|
||||||
bool is_bdr_db(PGconn *conn, PQExpBufferData *output);
|
|
||||||
bool is_bdr_db_quiet(PGconn *conn);
|
|
||||||
bool is_active_bdr_node(PGconn *conn, const char *node_name);
|
|
||||||
bool is_bdr_repmgr(PGconn *conn);
|
|
||||||
char *get_default_bdr_replication_set(PGconn *conn);
|
|
||||||
bool is_table_in_bdr_replication_set(PGconn *conn, const char *tablename, const char *set);
|
|
||||||
bool add_table_to_bdr_replication_set(PGconn *conn, const char *tablename, const char *set);
|
|
||||||
void add_extension_tables_to_bdr_replication_set(PGconn *conn);
|
|
||||||
bool bdr_node_name_matches(PGconn *conn, const char *node_name, PQExpBufferData *bdr_local_node_name);
|
|
||||||
ReplSlotStatus get_bdr_node_replication_slot_status(PGconn *conn, const char *node_name);
|
|
||||||
void get_bdr_other_node_name(PGconn *conn, int node_id, char *name_buf);
|
|
||||||
|
|
||||||
bool am_bdr_failover_handler(PGconn *conn, int node_id);
|
|
||||||
void unset_bdr_failover_handler(PGconn *conn);
|
|
||||||
bool bdr_node_has_repmgr_set(PGconn *conn, const char *node_name);
|
|
||||||
bool bdr_node_set_repmgr_set(PGconn *conn, const char *node_name);
|
|
||||||
|
|
||||||
/* miscellaneous debugging functions */
|
/* miscellaneous debugging functions */
|
||||||
const char *print_node_status(NodeStatus node_status);
|
const char *print_node_status(NodeStatus node_status);
|
||||||
const char *print_pqping_status(PGPing ping_status);
|
const char *print_pqping_status(PGPing ping_status);
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
* dirmod.c
|
* dirmod.c
|
||||||
* directory handling functions
|
* directory handling functions
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* dirutil.h
|
* dirutil.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -95,6 +95,7 @@ clean:
|
|||||||
rm -f repmgr.html
|
rm -f repmgr.html
|
||||||
rm -f repmgr-A4.pdf
|
rm -f repmgr-A4.pdf
|
||||||
rm -f repmgr-US.pdf
|
rm -f repmgr-US.pdf
|
||||||
|
rm -f html/*
|
||||||
|
|
||||||
maintainer-clean:
|
maintainer-clean:
|
||||||
rm -rf html
|
rm -rf html
|
||||||
|
|||||||
@@ -245,7 +245,7 @@
|
|||||||
<para>
|
<para>
|
||||||
For a standby which has been manually cloned or recovered from an external
|
For a standby which has been manually cloned or recovered from an external
|
||||||
backup manager such as Barman, the command
|
backup manager such as Barman, the command
|
||||||
<command><link linkend="repmgr-standby-clone">repmgr standby clone --recovery-conf-only</link></command>
|
<command><link linkend="repmgr-standby-clone">repmgr standby clone --replication-conf-only</link></command>
|
||||||
can be used to create the correct <filename>recovery.conf</filename> file for
|
can be used to create the correct <filename>recovery.conf</filename> file for
|
||||||
use with &repmgr; (and will create a replication slot if required). Once this has been done,
|
use with &repmgr; (and will create a replication slot if required). Once this has been done,
|
||||||
<link linkend="repmgr-standby-register">register the node</link> as usual.
|
<link linkend="repmgr-standby-register">register the node</link> as usual.
|
||||||
|
|||||||
@@ -253,20 +253,14 @@
|
|||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
&repmgr; <literal>.deb</literal> packages are provided via the
|
&repmgr; <literal>.deb</literal> packages are provided by 2ndQuadrant as well as the
|
||||||
PostgreSQL Community APT repository, and are available for each community-supported
|
PostgreSQL Community APT repository, and are available for each community-supported
|
||||||
PostgreSQL version, currently supported Debian releases, and currently supported
|
PostgreSQL version, currently supported Debian releases, and currently supported
|
||||||
Ubuntu LTS releases.
|
Ubuntu LTS releases.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<sect2 id="packages-apt-repository">
|
<sect2 id="packages-apt-repository">
|
||||||
<title>APT repository</title>
|
<title>APT repositories</title>
|
||||||
|
|
||||||
<para>
|
|
||||||
&repmgr; packages are available from the PostgreSQL Community APT repository,
|
|
||||||
which is updated immediately after each &repmgr; release.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
|
|
||||||
<table id="apt-2ndquadrant-repository">
|
<table id="apt-2ndquadrant-repository">
|
||||||
<title>2ndQuadrant public repository</title>
|
<title>2ndQuadrant public repository</title>
|
||||||
@@ -291,7 +285,7 @@
|
|||||||
<tbody>
|
<tbody>
|
||||||
<row>
|
<row>
|
||||||
<entry>Repository URL:</entry>
|
<entry>Repository URL:</entry>
|
||||||
<entry><ulink url="http://apt.postgresql.org/">http://apt.postgresql.org/</ulink></entry>
|
<entry><ulink url="https://apt.postgresql.org/">https://apt.postgresql.org/</ulink></entry>
|
||||||
</row>
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry>Repository documentation:</entry>
|
<entry>Repository documentation:</entry>
|
||||||
@@ -382,9 +376,9 @@
|
|||||||
</table>
|
</table>
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
Instead of using the <application>systemd</application> service command directly,
|
When using Debian packages, instead of using the <application>systemd</application> service
|
||||||
it's recommended to execute <command>pg_ctlcluster</command> (as <literal>root</literal>,
|
command directly, it's recommended to execute <command>pg_ctlcluster</command>
|
||||||
either directly or via <command>sudo</command>), e.g.:
|
(as <literal>root</literal>, either directly or via <command>sudo</command>), e.g.:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
<command>pg_ctlcluster 11 main [start|stop|restart|reload]</command></programlisting>
|
<command>pg_ctlcluster 11 main [start|stop|restart|reload]</command></programlisting>
|
||||||
</para>
|
</para>
|
||||||
@@ -477,7 +471,7 @@ repmgr96-4.1.1-0.0git320.g5113ab0.1.el7.x86_64.rpm</programlisting>
|
|||||||
<title>Debian/Ubuntu</title>
|
<title>Debian/Ubuntu</title>
|
||||||
<para>
|
<para>
|
||||||
An archive of old packages (<literal>3.3.2</literal> and later) for Debian/Ubuntu-based systems is available here:
|
An archive of old packages (<literal>3.3.2</literal> and later) for Debian/Ubuntu-based systems is available here:
|
||||||
<ulink url="http://atalia.postgresql.org/morgue/r/repmgr/">http://atalia.postgresql.org/morgue/r/repmgr/</ulink>
|
<ulink url="https://apt-archive.postgresql.org/">https://apt-archive.postgresql.org/</ulink>
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
|||||||
@@ -17,9 +17,202 @@
|
|||||||
|
|
||||||
<!-- remember to update the release date in ../repmgr_version.h.in -->
|
<!-- remember to update the release date in ../repmgr_version.h.in -->
|
||||||
|
|
||||||
|
<sect1 id="release-5.1.0">
|
||||||
|
<title id="release-current">Release 5.1.0</title>
|
||||||
|
<para><emphasis>Mon 13 April, 2020</emphasis></para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
&repmgr; 5.1.0 is a major release.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
For details on how to upgrade an existing &repmgr; installation, see
|
||||||
|
documentation section <link linkend="upgrading-major-version">Upgrading a major version release</link>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If &repmgrd; is in use, a PostgreSQL restart <emphasis>is</emphasis> required;
|
||||||
|
in that case we suggest combining this &repmgr; upgrade with the next PostgreSQL
|
||||||
|
minor release, which will require a PostgreSQL restart in any case.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>Compatibility changes</title>
|
||||||
|
<para>
|
||||||
|
The <link linkend="repmgr-standby-clone"><command>repmgr standby clone</command></link>
|
||||||
|
<option>--recovery-conf-only</option> option has been renamed to
|
||||||
|
<option>--replication-conf-only</option>. <option>--recovery-conf-only</option> will
|
||||||
|
still be accepted as an alias.
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>General improvements</title>
|
||||||
|
<para>
|
||||||
|
<itemizedlist>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
The requirement that the &repmgr; user is a database superuser has been
|
||||||
|
removed as far as possible.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
In theory, &repmgr; can be operated with a normal database user for managing
|
||||||
|
the &repmgr; database, and a separate replication user for managing replication
|
||||||
|
connections (and replication slots, if these are in use).
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Some operations will still require superuser permissions, e.g. for issuing
|
||||||
|
a <command>CHECKPOINT</command> as par of a switchover operation; in this case
|
||||||
|
a valid superuser should be provided with the <option>-S</option>/<option>--superuser</option>
|
||||||
|
option.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-clone"><command>repmgr standby clone</command></link>:
|
||||||
|
Warn if neither of data page checksums or <option>wal_log_hints</option> are active,
|
||||||
|
as this will preclude later usage of <command>pg_rewind</command>.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>:
|
||||||
|
when executed with <option>--dry-run</option>, the method which would be used to promote the node
|
||||||
|
will be displayed.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-follow"><command>repmgr standby follow</command></link>:
|
||||||
|
Improve logging and checking of potential failure situations.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>:
|
||||||
|
Replication configuration files (PostgreSQL 11 and earlier:
|
||||||
|
<filename>recovery.conf</filename>; PostgreSQL 12 and later: <filename>postgresql.auto.conf</filename>)
|
||||||
|
will be checked to ensure they are owned by the same user who owns the PostgreSQL
|
||||||
|
data directory.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>:
|
||||||
|
Provide additional information in <option>--dry-run mode</option> output.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>:
|
||||||
|
Checks that the demotion candidate's registered repmgr.conf file can be found, to
|
||||||
|
prevent confusing references to an incorrectly configured data directory. GitHub 615.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-node-check"><command>repmgr node check</command></link>:
|
||||||
|
accept option <option>-S</option>/<option>--superuser</option>. GitHub #621.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-node-check"><command>repmgr node check</command></link>:
|
||||||
|
add <option>--upstream</option> option to check whether the node is attached
|
||||||
|
to the expected upstream node.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2>
|
||||||
|
<title>Bug fixes</title>
|
||||||
|
<para>
|
||||||
|
<itemizedlist>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Ensure <link linkend="repmgr-node-rejoin"><command>repmgr node rejoin</command></link>
|
||||||
|
checks for available replication slots on the rejoin target.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-follow"><command>repmgr standby follow</command></link> and
|
||||||
|
<link linkend="repmgr-node-rejoin"><command>repmgr node rejoin</command></link> will now return
|
||||||
|
an error code if the operation fails if a replication slot is not available or cannot
|
||||||
|
be created on the follow/rejoin target.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>:
|
||||||
|
in <option>--dry-run mode</option>, display promote command which will be executed.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-promote"><command>repmgr standby promote</command></link>
|
||||||
|
will check if the <literal>repmgr</literal> user has permission to execute
|
||||||
|
<function>pg_promote()</function> and fall back to <command>pg_ctl promote</command> if
|
||||||
|
necessary.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
<link linkend="repmgr-standby-switchover"><command>repmgr standby switchover</command></link>:
|
||||||
|
check for demotion candidate reattachment as late as possible to avoid spurious failure
|
||||||
|
reports.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
&repmgrd;: check for presence of <option>promote_command</option> and
|
||||||
|
<option>follow_command</option> on receipt of <literal>SIGHUP</literal>. GitHub 614.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Fix situation where replication connections were not created correctly, which
|
||||||
|
could lead to spurious replication connection failures in some situations, e.g.
|
||||||
|
where password files are used.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Ensure <filename>postgresql.auto.conf</filename> is created with
|
||||||
|
correct permissions (PostgreSQL 12 and later).
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="release-5.0">
|
<sect1 id="release-5.0">
|
||||||
<title id="release-current">Release 5.0</title>
|
<title>Release 5.0</title>
|
||||||
<para><emphasis>?? September ??, 2019</emphasis></para>
|
<para><emphasis>Tue 15 October, 2019</emphasis></para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
&repmgr; 5.0 is a major release.
|
&repmgr; 5.0 is a major release.
|
||||||
@@ -167,6 +360,10 @@ conninfo='host=node1 user=repmgr dbname=repmgr connect_timeout=2'</programlistin
|
|||||||
function, which enables a standby to be promoted to primary using an SQL
|
function, which enables a standby to be promoted to primary using an SQL
|
||||||
command.
|
command.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
For an overview of general changes to replication configuration, see this blog entry:
|
||||||
|
<ulink url="https://www.2ndquadrant.com/en/blog/replication-configuration-changes-in-postgresql-12/">Replication configuration changes in PostgreSQL 12</ulink>
|
||||||
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
@@ -261,7 +458,7 @@ conninfo='host=node1 user=repmgr dbname=repmgr connect_timeout=2'</programlistin
|
|||||||
|
|
||||||
<sect1 id="release-4.4">
|
<sect1 id="release-4.4">
|
||||||
<title>Release 4.4</title>
|
<title>Release 4.4</title>
|
||||||
<para><emphasis>27 June, 2019</emphasis></para>
|
<para><emphasis>Thu 27 June, 2019</emphasis></para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
&repmgr; 4.4 is a major release.
|
&repmgr; 4.4 is a major release.
|
||||||
|
|||||||
@@ -43,6 +43,12 @@
|
|||||||
|
|
||||||
<itemizedlist spacing="compact" mark="bullet">
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
PostgreSQL version
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
&repmgr; version
|
&repmgr; version
|
||||||
@@ -68,9 +74,18 @@
|
|||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
PostgreSQL version
|
PostgreSQL 11 and earlier: contents of the <filename>recovery.conf</filename> file
|
||||||
|
(suitably anonymized if necessary).
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
PostgreSQL 12 and later: contents of the <filename>postgresql.auto.conf</filename> file
|
||||||
|
(suitably anonymized if necessary), and whether or not the PostgreSQL data directory
|
||||||
|
contains the files <filename>standby.signal</filename> and/or <filename>recovery.signal</filename>.
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
@@ -90,8 +105,8 @@
|
|||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
In all cases it is <emphasis>extremely</emphasis> useful to receive
|
In all cases it is <emphasis>extremely</emphasis> useful to receive
|
||||||
information on how to reliably reproduce an issue with as much detail as
|
as much detail as possible on how to reliably reproduce
|
||||||
possible.
|
an issue.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
BDR failover with repmgrd
|
|
||||||
=========================
|
|
||||||
|
|
||||||
This document has been integrated into the main `repmgr` documentation
|
|
||||||
and is now located here:
|
|
||||||
|
|
||||||
> [BDR failover with repmgrd](https://repmgr.org/docs/current/repmgrd-bdr.html)
|
|
||||||
|
|
||||||
@@ -64,8 +64,9 @@
|
|||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
Barman's parallel restore facility can be used by executing it manually on
|
Barman's parallel restore facility can be used by executing it manually on
|
||||||
the Barman server and integrating the resulting cloned standby using
|
the Barman server and configuring replication on the resulting cloned
|
||||||
<command><link linkend="repmgr-standby-clone">repmgr standby clone --recovery-conf-only</link></command>.
|
standby using
|
||||||
|
<command><link linkend="repmgr-standby-clone">repmgr standby clone --replication-conf-only</link></command>.
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
|
|
||||||
@@ -103,15 +104,28 @@
|
|||||||
<filename>repmgr.conf</filename> should contain the following entries:
|
<filename>repmgr.conf</filename> should contain the following entries:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
barman_host='barman@barmansrv'
|
barman_host='barman@barmansrv'
|
||||||
barman_server='somedb'</programlisting>
|
barman_server='pg'</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Here <literal>pg</literal> corresponds to a section in Barman's configuration file for a specific
|
||||||
|
server backup configuration, which would look something like:
|
||||||
|
<programlisting>
|
||||||
|
[pg]
|
||||||
|
description = "Main cluster"
|
||||||
|
...
|
||||||
|
</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
More details on Barman configuration can be found in the
|
||||||
|
<ulink url="https://docs.pgbarman.org/">Barman documentation</ulink>'s
|
||||||
|
<ulink url="https://docs.pgbarman.org/#configuration">configuration section</ulink>.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
To use a non-default Barman configuration file on the Barman server,
|
To use a non-default Barman configuration file on the Barman server,
|
||||||
specify this in <filename>repmgr.conf</filename> with <filename>barman_config</filename>:
|
specify this in <filename>repmgr.conf</filename> with <filename>barman_config</filename>:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
barman_config=/path/to/barman.conf</programlisting>
|
barman_config='/path/to/barman.conf'</programlisting>
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
|
|
||||||
@@ -189,8 +203,8 @@
|
|||||||
<filename>repmgr.conf</filename> should include the following lines:
|
<filename>repmgr.conf</filename> should include the following lines:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
barman_host='barman@barmansrv'
|
barman_host='barman@barmansrv'
|
||||||
barman_server='somedb'
|
barman_server='pg'
|
||||||
restore_command='/usr/bin/barman-wal-restore barmansrv somedb %f %p'</programlisting>
|
restore_command='/usr/bin/barman-wal-restore barmansrv pg %f %p'</programlisting>
|
||||||
</para>
|
</para>
|
||||||
<note>
|
<note>
|
||||||
<simpara>
|
<simpara>
|
||||||
@@ -444,10 +458,8 @@
|
|||||||
|
|
||||||
<para>
|
<para>
|
||||||
The recommended way to do this is to store the password in the <literal>postgres</literal> system
|
The recommended way to do this is to store the password in the <literal>postgres</literal> system
|
||||||
user's <filename>~/.pgpass</filename> file. It's also possible to store the password in the
|
user's <filename>~/.pgpass</filename> file. For more information on using the password file, see
|
||||||
environment variable <varname>PGPASSWORD</varname>, however this is not recommended for
|
the documentation section <xref linkend="configuration-password-file"/>.
|
||||||
security reasons. For more details see the
|
|
||||||
<ulink url="https://www.postgresql.org/docs/current/libpq-pgpass.html">PostgreSQL password file documentation</ulink>.
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<note>
|
<note>
|
||||||
@@ -469,19 +481,6 @@
|
|||||||
will need to be set during any action which causes <filename>recovery.conf</filename> to be
|
will need to be set during any action which causes <filename>recovery.conf</filename> to be
|
||||||
rewritten, e.g. <xref linkend="repmgr-standby-follow"/>.
|
rewritten, e.g. <xref linkend="repmgr-standby-follow"/>.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
|
||||||
It is of course also possible to include the password value in the <varname>conninfo</varname>
|
|
||||||
string for each node, but this is obviously a security risk and should be avoided.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
From PostgreSQL 9.6, <application>libpq</application> supports the <varname>passfile</varname>
|
|
||||||
parameter in connection strings, which can be used to specify a password file other than
|
|
||||||
the default <filename>~/.pgpass</filename>.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
To have &repmgr; write a custom password file in <varname>primary_conninfo</varname>,
|
|
||||||
specify its location in <varname>passfile</varname> in <filename>repmgr.conf</filename>.
|
|
||||||
</para>
|
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
<sect2 id="cloning-advanced-replication-user" xreflabel="Separate replication user">
|
<sect2 id="cloning-advanced-replication-user" xreflabel="Separate replication user">
|
||||||
@@ -497,6 +496,34 @@
|
|||||||
cloning a node or executing <xref linkend="repmgr-standby-follow"/>.
|
cloning a node or executing <xref linkend="repmgr-standby-follow"/>.
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
|
||||||
|
<sect2 id="cloning-advanced-tablespace-mapping" xreflabel="Tablespace mapping">
|
||||||
|
<title>Tablespace mapping</title>
|
||||||
|
<indexterm>
|
||||||
|
<primary>tablespace mapping</primary>
|
||||||
|
</indexterm>
|
||||||
|
<para>
|
||||||
|
&repmgr; provides a <option>tablespace_mapping</option> configuration
|
||||||
|
file option, which will makes it possible to map the tablespace on the source node to
|
||||||
|
a different location on the local node.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
To use this, add <option>tablespace_mapping</option> to <filename>repmgr.conf</filename>
|
||||||
|
like this:
|
||||||
|
<programlisting>
|
||||||
|
tablespace_mapping='/var/lib/pgsql/tblspc1=/data/pgsql/tblspc1'
|
||||||
|
</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
where the left-hand value represents the tablespace on the source node,
|
||||||
|
and the right-hand value represents the tablespace on the standby to be cloned.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This parameter can be provided multiple times.
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -66,17 +66,8 @@
|
|||||||
</term>
|
</term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Must be one of <literal>physical</literal> (for standard streaming replication)
|
Must be <literal>physical</literal> (the default).
|
||||||
or <literal>bdr</literal>.
|
|
||||||
</para>
|
</para>
|
||||||
<note>
|
|
||||||
<para>
|
|
||||||
Replication type <literal>bdr</literal> can only be used with BDR 2.x
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
BDR 3.x users should use <literal>physical</literal>.
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
@@ -119,6 +110,27 @@
|
|||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry id="repmgr-conf-ssh-options" xreflabel="ssh_options">
|
||||||
|
<term><varname>ssh_options</varname> (<type>string</type>)
|
||||||
|
<indexterm>
|
||||||
|
<primary><varname>ssh_options</varname> configuration file parameter</primary>
|
||||||
|
</indexterm>
|
||||||
|
</term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Options to append to the <command>ssh</command> command when executed
|
||||||
|
by &repmgr;.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
We recommend adding <literal>-q</literal> to suppress any superfluous
|
||||||
|
SSH chatter such as login banners, and also an explicit
|
||||||
|
<option>ConnectTimeout</option> value,
|
||||||
|
e.g.:
|
||||||
|
<programlisting>
|
||||||
|
ssh_options='-q -o ConnectTimeout=10'</programlisting>
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
</variablelist>
|
</variablelist>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|||||||
@@ -182,6 +182,14 @@ conninfo='host=node1 user=repmgr dbname=repmgr connect_timeout=2'</programlistin
|
|||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
In examples provided in this documentation, it is assumed the configuration file is located
|
||||||
|
at <filename>/etc/repmgr.conf</filename>. If &repmgr; is installed from a package, the
|
||||||
|
configuration file will probably be located at another location specified by the packager;
|
||||||
|
see appendix <xref linkend="appendix-packages"/> for configuration file locations in
|
||||||
|
different packaging systems.
|
||||||
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Note that if a file is explicitly specified with <literal>-f/--config-file</literal>,
|
Note that if a file is explicitly specified with <literal>-f/--config-file</literal>,
|
||||||
an error will be raised if it is not found or not readable, and no attempt will be made to
|
an error will be raised if it is not found or not readable, and no attempt will be made to
|
||||||
@@ -202,6 +210,61 @@ conninfo='host=node1 user=repmgr dbname=repmgr connect_timeout=2'</programlistin
|
|||||||
<filename>/path/to/repmgr.conf</filename>).
|
<filename>/path/to/repmgr.conf</filename>).
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="configuration-file-postgresql-major-upgrades" xreflabel="configuration file and PostgreSQL major version upgrades">
|
||||||
|
<title>Configuration file and PostgreSQL major version upgrades</title>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>repmgr.conf</primary>
|
||||||
|
<secondary>ostgreSQL major version upgrades</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
When upgrading the PostgreSQL cluster to a new major version, <filename>repmgr.conf</filename>
|
||||||
|
will probably needed to be updated.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Usually <option>pg_bindir</option> and <option>data_directory</option> will need to be modified,
|
||||||
|
particularly if the default package locations are used, as these usually change.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
It's also possible the location of <filename>repmgr.conf</filename> itself will change
|
||||||
|
(e.g. from <filename>/etc/repmgr/11/repmgr.conf</filename> to <filename>/etc/repmgr/12/repmgr.conf</filename>).
|
||||||
|
This is stored as part of the &repmgr; metadata and is used by &repmgr; to execute &repmgr; remotely
|
||||||
|
(e.g. during a <link linkend="performing-switchover">switchover operation</link>).
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If the content and/or location of <filename>repmgr.conf</filename> has changed, the &repmgr; metadata
|
||||||
|
needs to be updated to reflect this. The &repmgr; metadata can be updated on each node with:
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<link linkend="repmgr-primary-register">
|
||||||
|
<command>repmgr primary register --force -f /path/to/repmgr.conf</command>
|
||||||
|
</link>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<link linkend="repmgr-standby-register">
|
||||||
|
<command>repmgr standby register --force -f /path/to/repmgr.conf</command>
|
||||||
|
</link>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<link linkend="repmgr-witness-register">
|
||||||
|
<command>repmgr witness register --force -f /path/to/repmgr.conf -h primary_host</command>
|
||||||
|
</link>
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
|
||||||
</sect2>
|
</sect2>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|||||||
175
doc/configuration-password-management.xml
Normal file
175
doc/configuration-password-management.xml
Normal file
@@ -0,0 +1,175 @@
|
|||||||
|
<sect1 id="configuration-password-management" xreflabel="password management">
|
||||||
|
|
||||||
|
<title>Password Management</title>
|
||||||
|
<indexterm>
|
||||||
|
<primary>passwords</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<sect2 id="configuration-password-management-options" xreflabel="password management options">
|
||||||
|
<title>Password Management Options</title>
|
||||||
|
<indexterm>
|
||||||
|
<primary>passwords</primary>
|
||||||
|
<secondary>options for managing</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
For security purposes it's desirable to protect database access using a password.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
PostgreSQL has three ways of providing a password:
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
including the password in the <option>conninfo</option> string
|
||||||
|
(e.g. "<literal>host=node1 dbname=repmgr user=repmgr password=foo</literal>")
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
exporting the password as an environment variable (<envar>PGPASSWORD</envar>)
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
storing the password in a dedicated password file
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
We strongly advise against including the password in the <option>conninfo</option> string, as
|
||||||
|
this will result in the database password being exposed in various places, including in the
|
||||||
|
<filename>repmgr.conf</filename> file, the <literal>repmgr.nodes</literal> table, any output
|
||||||
|
generated by &repmgr; which lists the node <option>conninfo</option> strings (e.g.
|
||||||
|
<link linkend="repmgr-cluster-show">repmgr cluster show</link>) and in the &repmgr; log file,
|
||||||
|
particularly at <option>log_level=DEBUG</option>.
|
||||||
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
Currently &repmgr; does not fully support use of the <option>password</option> option in the
|
||||||
|
<option>conninfo</option> string.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
<para>
|
||||||
|
Exporting the password as an environment variable (<envar>PGPASSWORD</envar>) is considered
|
||||||
|
less insecure, but the PostgreSQL documentation explicitly recommends against doing this:
|
||||||
|
<blockquote>
|
||||||
|
<attribution><ulink url="https://www.postgresql.org/docs/current/libpq-envars.html">Environment Variables</ulink></attribution>
|
||||||
|
<para>
|
||||||
|
<envar>PGPASSWORD</envar> behaves the same as the <option>password</option>
|
||||||
|
connection parameter. Use of this environment variable
|
||||||
|
is not recommended for security reasons, as some operating systems
|
||||||
|
allow non-root users to see process environment variables via
|
||||||
|
<application>ps</application>; instead consider using a password file.
|
||||||
|
</para>
|
||||||
|
</blockquote>
|
||||||
|
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
The most secure option for managing passwords is to use a dedicated password file; see the following
|
||||||
|
section for more details.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="configuration-password-file" xreflabel="password file">
|
||||||
|
<title>Using a password file</title>
|
||||||
|
<indexterm>
|
||||||
|
<primary>pgpass</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>.pgpass</primary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>passwords</primary>
|
||||||
|
<secondary>using a password file</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The most secure way of storing passwords is in a password file,
|
||||||
|
which by default is <filename>~/.pgpass</filename>. This file
|
||||||
|
can only be read by the system user who owns the file, and
|
||||||
|
PostgreSQL will refuse to use the file unless read/write
|
||||||
|
permissions are restricted to the file owner. The password(s)
|
||||||
|
contained in the file will not be directly accessed by
|
||||||
|
&repmgr; (or any other libpq-based client software such as <application>psql</application>).
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
For full details see the
|
||||||
|
<ulink url="https://www.postgresql.org/docs/current/libpq-pgpass.html">PostgreSQL password file documentation</ulink>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
For use with &repmgr;, the <filename>~/.pgpass</filename> must two entries for each
|
||||||
|
node in the replication cluster: one for the &repmgr; user who accesses the &repmgr; metadatabase,
|
||||||
|
and one for replication connections (regardless of whether a dedicated replication user is used).
|
||||||
|
The file must be present on each node in the replication cluster.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
A <filename>~/.pgpass</filename> file for a 3-node cluster where the <literal>repmgr</literal> database user
|
||||||
|
is used for both for accessing the &repmgr; metadatabase and for replication connections would look like this:
|
||||||
|
<programlisting>
|
||||||
|
node1:5432:repmgr:repmgr:foo
|
||||||
|
node1:5432:replication:repmgr:foo
|
||||||
|
node2:5432:repmgr:repmgr:foo
|
||||||
|
node2:5432:replication:repmgr:foo
|
||||||
|
node3:5432:repmgr:repmgr:foo
|
||||||
|
node3:5432:replication:repmgr:foo</programlisting>
|
||||||
|
If a dedicated replication user (here: <literal>repluser</literal>) is in use, the file would look like this:
|
||||||
|
<programlisting>
|
||||||
|
node1:5432:repmgr:repmgr:foo
|
||||||
|
node1:5432:replication:repluser:foo
|
||||||
|
node2:5432:repmgr:repmgr:foo
|
||||||
|
node2:5432:replication:repluser:foo
|
||||||
|
node3:5432:repmgr:repmgr:foo
|
||||||
|
node3:5432:replication:repluser:foo</programlisting>
|
||||||
|
If you are planning to use the <option>-S</option>/<option>--superuser</option> option,
|
||||||
|
there must also be an entry enabling the superuser to connect to the &repmgr; database.
|
||||||
|
Assuming the superuser is <literal>postgres</literal>, the file would look like this:
|
||||||
|
<programlisting>
|
||||||
|
node1:5432:repmgr:repmgr:foo
|
||||||
|
node1:5432:repmgr:postgres:foo
|
||||||
|
node1:5432:replication:repluser:foo
|
||||||
|
node2:5432:repmgr:repmgr:foo
|
||||||
|
node2:5432:repmgr:postgres:foo
|
||||||
|
node2:5432:replication:repluser:foo
|
||||||
|
node3:5432:repmgr:repmgr:foo
|
||||||
|
node3:5432:repmgr:postgres:foo
|
||||||
|
node3:5432:replication:repluser:foo</programlisting>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The <filename>~/.pgpass</filename> file can be simplified with the use of wildcards if
|
||||||
|
there is no requirement to restrict provision of passwords to particular hosts, ports
|
||||||
|
or databases. The preceding file could then be formatted like this:
|
||||||
|
<programlisting>
|
||||||
|
*:*:*:repmgr:foo
|
||||||
|
*:*:*:postgres:foo
|
||||||
|
</programlisting>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
It's possible to specify an alternative location for the <filename>~/.pgpass</filename> file, either via
|
||||||
|
the environment variable <envar>PGPASSFILE</envar>, or (from PostgreSQL 9.6) using the
|
||||||
|
<varname>passfile</varname> parameter in connection strings.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If using the <varname>passfile</varname> parameter, it's essential to ensure the file is in the same
|
||||||
|
location on all nodes, as when connecting to a remote node, the file referenced is the one on the
|
||||||
|
local node.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Additionally, you <emphasis>must</emphasis> specify the passfile location in <filename>repmgr.conf</filename>
|
||||||
|
with the <option>passfile</option> option so &repmgr; can write the correct path when creating the
|
||||||
|
<option>primary_conninfo</option> parameter for replication configuration on standbys.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
|
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
</sect1>
|
||||||
25
doc/configuration-permissions.xml
Normal file
25
doc/configuration-permissions.xml
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
<sect1 id="configuration-permissions" xreflabel="Database user permissions">
|
||||||
|
<title>repmgr database user permissions</title>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>configuration</primary>
|
||||||
|
<secondary>database user permissions</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
&repmgr; requires that the database defined in the <varname>conninfo</varname>
|
||||||
|
setting contains the <literal>repmgr</literal> extension. The database user defined in the
|
||||||
|
<varname>conninfo</varname> setting must be able to access this database and
|
||||||
|
the database objects contained within the extension.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
The <literal>repmgr</literal> extension can only be installed by a superuser.
|
||||||
|
If the &repmgr; user is a superuser, &repmgr; will create the extension automatically.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Alternatively, the extension can be created manually by a superuser
|
||||||
|
(with "<command>CREATE EXTENSION repmgr</command>") before executing
|
||||||
|
<link linkend="repmgr-primary-register">repmgr primary register</link>.
|
||||||
|
</para>
|
||||||
|
</sect1>
|
||||||
@@ -318,23 +318,7 @@
|
|||||||
&configuration-file-optional-settings;
|
&configuration-file-optional-settings;
|
||||||
&configuration-file-log-settings;
|
&configuration-file-log-settings;
|
||||||
&configuration-file-service-commands;
|
&configuration-file-service-commands;
|
||||||
|
&configuration-permissions;
|
||||||
|
&configuration-password-management;
|
||||||
|
|
||||||
<sect1 id="configuration-permissions" xreflabel="Database user permissions">
|
|
||||||
<title>repmgr database user permissions</title>
|
|
||||||
|
|
||||||
<indexterm>
|
|
||||||
<primary>configuration</primary>
|
|
||||||
<secondary>database user permissions</secondary>
|
|
||||||
</indexterm>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
&repmgr; will create an extension database containing objects
|
|
||||||
for administering &repmgr; metadata. The user defined in the <varname>conninfo</varname>
|
|
||||||
setting must be able to access all objects. Additionally, superuser permissions
|
|
||||||
are required to install the &repmgr; extension. The easiest way to do this
|
|
||||||
is create the &repmgr; user as a superuser, however if this is not
|
|
||||||
desirable, the &repmgr; user can be created as a normal user and a
|
|
||||||
superuser specified with <literal>--superuser</literal> when registering a &repmgr; node.
|
|
||||||
</para>
|
|
||||||
</sect1>
|
|
||||||
</chapter>
|
</chapter>
|
||||||
|
|||||||
@@ -117,10 +117,6 @@
|
|||||||
<literal>conninfo</literal> string of the primary node
|
<literal>conninfo</literal> string of the primary node
|
||||||
(<xref linkend="repmgr-standby-register"/> and <xref linkend="repmgr-standby-follow"/>)
|
(<xref linkend="repmgr-standby-register"/> and <xref linkend="repmgr-standby-follow"/>)
|
||||||
</para>
|
</para>
|
||||||
<para>
|
|
||||||
<literal>conninfo</literal> string of the next available node
|
|
||||||
(<varname>bdr_failover</varname> and <varname>bdr_recovery</varname>)
|
|
||||||
</para>
|
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
@@ -130,9 +126,6 @@
|
|||||||
<para>
|
<para>
|
||||||
name of the current primary node (<xref linkend="repmgr-standby-register"/> and <xref linkend="repmgr-standby-follow"/>)
|
name of the current primary node (<xref linkend="repmgr-standby-register"/> and <xref linkend="repmgr-standby-follow"/>)
|
||||||
</para>
|
</para>
|
||||||
<para>
|
|
||||||
name of the next available node (<varname>bdr_failover</varname> and <varname>bdr_recovery</varname>)
|
|
||||||
</para>
|
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
@@ -148,7 +141,7 @@
|
|||||||
the notification types can be filtered to explicitly named ones using the
|
the notification types can be filtered to explicitly named ones using the
|
||||||
<varname>event_notifications</varname> parameter, e.g.:
|
<varname>event_notifications</varname> parameter, e.g.:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
event_notifications=primary_register,standby_register,witness_register</programlisting>
|
event_notifications='primary_register,standby_register,witness_register'</programlisting>
|
||||||
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
@@ -273,28 +266,6 @@
|
|||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
|
||||||
Events generated by &repmgrd; (BDR mode):
|
|
||||||
<itemizedlist spacing="compact" mark="bullet">
|
|
||||||
<listitem>
|
|
||||||
<simpara><literal>bdr_failover</literal></simpara>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<simpara><literal>bdr_reconnect</literal></simpara>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<simpara><literal>bdr_recovery</literal></simpara>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<simpara><literal>bdr_register</literal></simpara>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<simpara><literal>bdr_unregister</literal></simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
</itemizedlist>
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Note that under some circumstances (e.g. when no replication cluster primary
|
Note that under some circumstances (e.g. when no replication cluster primary
|
||||||
could be located), it will not be possible to write an entry into the
|
could be located), it will not be possible to write an entry into the
|
||||||
|
|||||||
@@ -21,6 +21,8 @@
|
|||||||
<!ENTITY configuration-file-optional-settings SYSTEM "configuration-file-optional-settings.xml">
|
<!ENTITY configuration-file-optional-settings SYSTEM "configuration-file-optional-settings.xml">
|
||||||
<!ENTITY configuration-file-log-settings SYSTEM "configuration-file-log-settings.xml">
|
<!ENTITY configuration-file-log-settings SYSTEM "configuration-file-log-settings.xml">
|
||||||
<!ENTITY configuration-file-service-commands SYSTEM "configuration-file-service-commands.xml">
|
<!ENTITY configuration-file-service-commands SYSTEM "configuration-file-service-commands.xml">
|
||||||
|
<!ENTITY configuration-permissions SYSTEM "configuration-permissions.xml">
|
||||||
|
<!ENTITY configuration-password-management SYSTEM "configuration-password-management.xml">
|
||||||
<!ENTITY cloning-standbys SYSTEM "cloning-standbys.xml">
|
<!ENTITY cloning-standbys SYSTEM "cloning-standbys.xml">
|
||||||
<!ENTITY promoting-standby SYSTEM "promoting-standby.xml">
|
<!ENTITY promoting-standby SYSTEM "promoting-standby.xml">
|
||||||
<!ENTITY follow-new-primary SYSTEM "follow-new-primary.xml">
|
<!ENTITY follow-new-primary SYSTEM "follow-new-primary.xml">
|
||||||
@@ -33,7 +35,6 @@
|
|||||||
<!ENTITY repmgrd-automatic-failover SYSTEM "repmgrd-automatic-failover.xml">
|
<!ENTITY repmgrd-automatic-failover SYSTEM "repmgrd-automatic-failover.xml">
|
||||||
<!ENTITY repmgrd-configuration SYSTEM "repmgrd-configuration.xml">
|
<!ENTITY repmgrd-configuration SYSTEM "repmgrd-configuration.xml">
|
||||||
<!ENTITY repmgrd-operation SYSTEM "repmgrd-operation.xml">
|
<!ENTITY repmgrd-operation SYSTEM "repmgrd-operation.xml">
|
||||||
<!ENTITY repmgrd-bdr SYSTEM "repmgrd-bdr.xml">
|
|
||||||
|
|
||||||
<!ENTITY repmgr-primary-register SYSTEM "repmgr-primary-register.xml">
|
<!ENTITY repmgr-primary-register SYSTEM "repmgr-primary-register.xml">
|
||||||
<!ENTITY repmgr-primary-unregister SYSTEM "repmgr-primary-unregister.xml">
|
<!ENTITY repmgr-primary-unregister SYSTEM "repmgr-primary-unregister.xml">
|
||||||
|
|||||||
@@ -26,10 +26,18 @@
|
|||||||
<ulink url="https://dl.2ndquadrant.com/">public repository</ulink>; see following
|
<ulink url="https://dl.2ndquadrant.com/">public repository</ulink>; see following
|
||||||
section for details.
|
section for details.
|
||||||
</para>
|
</para>
|
||||||
|
<note>
|
||||||
|
<para>
|
||||||
|
Currently the <ulink url="https://2ndquadrant.com">2ndQuadrant</ulink>
|
||||||
|
<ulink url="https://dl.2ndquadrant.com/">public repository</ulink> provides
|
||||||
|
support for RedHat/CentOS versions 5, 6 and 7. Support for version 8 is
|
||||||
|
available via the PGDG repository; see below for details.
|
||||||
|
</para>
|
||||||
|
</note>
|
||||||
<para>
|
<para>
|
||||||
RPM packages for &repmgr; are also available via Yum through
|
RPM packages for &repmgr; are also available via Yum through
|
||||||
the PostgreSQL Global Development Group RPM repository
|
the PostgreSQL Global Development Group (PGDG) RPM repository
|
||||||
(<ulink url="https://yum.postgresql.org/">http://yum.postgresql.org/</ulink>).
|
(<ulink url="https://yum.postgresql.org/">https://yum.postgresql.org/</ulink>).
|
||||||
Follow the instructions for your distribution (RedHat, CentOS,
|
Follow the instructions for your distribution (RedHat, CentOS,
|
||||||
Fedora, etc.) and architecture as detailed there. Note that it can take some days
|
Fedora, etc.) and architecture as detailed there. Note that it can take some days
|
||||||
for new &repmgr; packages to become available via the this repository.
|
for new &repmgr; packages to become available via the this repository.
|
||||||
@@ -198,7 +206,7 @@ repmgr11.x86_64 4.4-1.el7 2nd
|
|||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
<para>.deb packages for &repmgr; are available from the
|
<para>.deb packages for &repmgr; are available from the
|
||||||
PostgreSQL Community APT repository (<ulink url="http://apt.postgresql.org/">http://apt.postgresql.org/</ulink>).
|
PostgreSQL Community APT repository (<ulink url="https://apt.postgresql.org/">https://apt.postgresql.org/</ulink>).
|
||||||
Instructions can be found in the APT section of the PostgreSQL Wiki
|
Instructions can be found in the APT section of the PostgreSQL Wiki
|
||||||
(<ulink url="https://wiki.postgresql.org/wiki/Apt">https://wiki.postgresql.org/wiki/Apt</ulink>).
|
(<ulink url="https://wiki.postgresql.org/wiki/Apt">https://wiki.postgresql.org/wiki/Apt</ulink>).
|
||||||
</para>
|
</para>
|
||||||
|
|||||||
@@ -99,6 +99,9 @@
|
|||||||
<entry>
|
<entry>
|
||||||
&repmgr; version
|
&repmgr; version
|
||||||
</entry>
|
</entry>
|
||||||
|
<entry>
|
||||||
|
Supported?
|
||||||
|
</entry>
|
||||||
<entry>
|
<entry>
|
||||||
Latest release
|
Latest release
|
||||||
</entry>
|
</entry>
|
||||||
@@ -114,6 +117,9 @@
|
|||||||
<entry>
|
<entry>
|
||||||
&repmgr; 5.x
|
&repmgr; 5.x
|
||||||
</entry>
|
</entry>
|
||||||
|
<entry>
|
||||||
|
YES
|
||||||
|
</entry>
|
||||||
<entry>
|
<entry>
|
||||||
<link linkend="release-current">&repmgrversion;</link> (&releasedate;)
|
<link linkend="release-current">&repmgrversion;</link> (&releasedate;)
|
||||||
</entry>
|
</entry>
|
||||||
@@ -127,7 +133,10 @@
|
|||||||
&repmgr; 4.x
|
&repmgr; 4.x
|
||||||
</entry>
|
</entry>
|
||||||
<entry>
|
<entry>
|
||||||
<link linkend="release-4.4">4.4</link> (27 June, 2019)
|
NO
|
||||||
|
</entry>
|
||||||
|
<entry>
|
||||||
|
<link linkend="release-4.4">4.4</link> (2019-06-27)
|
||||||
</entry>
|
</entry>
|
||||||
<entry>
|
<entry>
|
||||||
9.3, 9.4, 9.5, 9.6, 10, 11
|
9.3, 9.4, 9.5, 9.6, 10, 11
|
||||||
@@ -138,6 +147,9 @@
|
|||||||
<entry>
|
<entry>
|
||||||
&repmgr; 3.x
|
&repmgr; 3.x
|
||||||
</entry>
|
</entry>
|
||||||
|
<entry>
|
||||||
|
NO
|
||||||
|
</entry>
|
||||||
<entry>
|
<entry>
|
||||||
<ulink url="https://repmgr.org/release-notes-3.3.2.html">3.3.2</ulink> (2017-05-30)
|
<ulink url="https://repmgr.org/release-notes-3.3.2.html">3.3.2</ulink> (2017-05-30)
|
||||||
</entry>
|
</entry>
|
||||||
@@ -150,6 +162,9 @@
|
|||||||
<entry>
|
<entry>
|
||||||
&repmgr; 2.x
|
&repmgr; 2.x
|
||||||
</entry>
|
</entry>
|
||||||
|
<entry>
|
||||||
|
NO
|
||||||
|
</entry>
|
||||||
<entry>
|
<entry>
|
||||||
<ulink url="https://repmgr.org/release-notes-2.0.3.html">2.0.3</ulink> (2015-04-16)
|
<ulink url="https://repmgr.org/release-notes-2.0.3.html">2.0.3</ulink> (2015-04-16)
|
||||||
</entry>
|
</entry>
|
||||||
@@ -167,8 +182,23 @@
|
|||||||
The &repmgr; 2.x and 3.x series are no longer maintained or supported.
|
The &repmgr; 2.x and 3.x series are no longer maintained or supported.
|
||||||
We strongly recommend upgrading to the latest &repmgr; version.
|
We strongly recommend upgrading to the latest &repmgr; version.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
Following the release of &repmgr; 5.0, there will be no further releases of
|
||||||
|
the &repmgr; 4.x series. Note that &repmgr; 5.x is an incremental development
|
||||||
|
of the 4.x series and &repmgr; 4.x users should upgrade to this as soon as possible.
|
||||||
|
</para>
|
||||||
</important>
|
</important>
|
||||||
|
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="install-postgresql-93-94">
|
||||||
|
|
||||||
|
<title>PostgreSQL 9.3 and 9.4 support</title>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>PostgreSQL 9.3</primary>
|
||||||
|
<secondary>repmgr support</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Note that some &repmgr; functionality is not available in PostgreSQL 9.3 and PostgreSQL 9.4:
|
Note that some &repmgr; functionality is not available in PostgreSQL 9.3 and PostgreSQL 9.4:
|
||||||
@@ -190,5 +220,26 @@
|
|||||||
</listitem>
|
</listitem>
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
|
|
||||||
|
<important>
|
||||||
|
<para>
|
||||||
|
PostgreSQL 9.3 has reached the end of its community support period (final release was
|
||||||
|
<ulink url="https://www.postgresql.org/docs/9.3/release-9-3-25.html">9.3.25</ulink>
|
||||||
|
in November 2018) and will no longer be updated with security or bugfixes.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
PostgreSQL 9.4 has reached the end of its community support period (final release was
|
||||||
|
<ulink url="https://www.postgresql.org/docs/9.4/release-9-4-26.html">9.4.26</ulink>
|
||||||
|
in February 2020) and will no longer be updated with security or bugfixes.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
We recommend that users of these versions migrate to a recent PostgreSQL version
|
||||||
|
as soon as possible.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
For further details, see the <ulink url="https://www.postgresql.org/support/versioning/">PostgreSQL Versioning Policy</ulink>.
|
||||||
|
</para>
|
||||||
|
</important>
|
||||||
|
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|||||||
@@ -24,8 +24,7 @@
|
|||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
<literal>Debian</literal> and <literal>Ubuntu</literal>: First
|
<literal>Debian</literal> and <literal>Ubuntu</literal>: First
|
||||||
add the <ulink
|
add the <ulink url="https://apt.postgresql.org/">apt.postgresql.org</ulink>
|
||||||
url="http://apt.postgresql.org/">apt.postgresql.org</ulink>
|
|
||||||
repository to your <filename>sources.list</filename> if you
|
repository to your <filename>sources.list</filename> if you
|
||||||
have not already done so, and ensure the source repository is enabled.
|
have not already done so, and ensure the source repository is enabled.
|
||||||
</para>
|
</para>
|
||||||
@@ -36,8 +35,8 @@
|
|||||||
line in the repository file, which is usually
|
line in the repository file, which is usually
|
||||||
<filename>/etc/apt/sources.list.d/pgdg.list</filename>, e.g.:
|
<filename>/etc/apt/sources.list.d/pgdg.list</filename>, e.g.:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
deb http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main
|
deb https://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main
|
||||||
deb-src http://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main</programlisting>
|
deb-src https://apt.postgresql.org/pub/repos/apt/ stretch-pgdg main</programlisting>
|
||||||
</para>
|
</para>
|
||||||
</tip>
|
</tip>
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
<date>2017</date>
|
<date>2017</date>
|
||||||
|
|
||||||
<copyright>
|
<copyright>
|
||||||
<year>2010-2019</year>
|
<year>2010-2020</year>
|
||||||
<holder>2ndQuadrant, Ltd.</holder>
|
<holder>2ndQuadrant, Ltd.</holder>
|
||||||
</copyright>
|
</copyright>
|
||||||
|
|
||||||
@@ -11,7 +11,7 @@
|
|||||||
<title>Legal Notice</title>
|
<title>Legal Notice</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
<productname>repmgr</productname> is Copyright © 2010-2019
|
<productname>repmgr</productname> is Copyright © 2010-2020
|
||||||
by 2ndQuadrant, Ltd. All rights reserved.
|
by 2ndQuadrant, Ltd. All rights reserved.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
|||||||
@@ -167,7 +167,7 @@
|
|||||||
For the sake of simplicity, the <literal>repmgr</literal> user is created
|
For the sake of simplicity, the <literal>repmgr</literal> user is created
|
||||||
as a superuser. If desired, it's possible to create the <literal>repmgr</literal>
|
as a superuser. If desired, it's possible to create the <literal>repmgr</literal>
|
||||||
user as a normal user. However for certain operations superuser permissions
|
user as a normal user. However for certain operations superuser permissions
|
||||||
are requiredl; in this case the command line option <command>--superuser</command>
|
are required; in this case the command line option <command>--superuser</command>
|
||||||
can be provided to specify a superuser.
|
can be provided to specify a superuser.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -18,7 +18,7 @@
|
|||||||
<para>
|
<para>
|
||||||
Displays information about each registered node in the replication cluster. This
|
Displays information about each registered node in the replication cluster. This
|
||||||
command polls each registered server and shows its role (<literal>primary</literal> /
|
command polls each registered server and shows its role (<literal>primary</literal> /
|
||||||
<literal>standby</literal> / <literal>bdr</literal>) and status. It polls each server
|
<literal>standby</literal>) and status. It polls each server
|
||||||
directly and can be run on any node in the cluster; this is also useful when analyzing
|
directly and can be run on any node in the cluster; this is also useful when analyzing
|
||||||
connectivity from a particular node.
|
connectivity from a particular node.
|
||||||
</para>
|
</para>
|
||||||
@@ -53,12 +53,13 @@
|
|||||||
<para>
|
<para>
|
||||||
<programlisting>
|
<programlisting>
|
||||||
$ repmgr -f /etc/repmgr.conf cluster show
|
$ repmgr -f /etc/repmgr.conf cluster show
|
||||||
|
|
||||||
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
|
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
|
||||||
----+-------+---------+-----------+----------+----------+----------+-----------------------------------------
|
----+-------+---------+-----------+----------+----------+----------+----------+-----------------------------------------
|
||||||
1 | node1 | primary | * running | | default | 100 | 1 | host=db_node1 dbname=repmgr user=repmgr
|
1 | node1 | primary | * running | | default | 100 | 1 | host=db_node1 dbname=repmgr user=repmgr
|
||||||
2 | node2 | standby | running | node1 | default | 100 | 1 | host=db_node2 dbname=repmgr user=repmgr
|
2 | node2 | standby | running | node1 | default | 100 | 1 | host=db_node2 dbname=repmgr user=repmgr
|
||||||
3 | node3 | standby | running | node1 | default | 100 | 1 | host=db_node3 dbname=repmgr user=repmgr</programlisting>
|
3 | node3 | standby | running | node1 | default | 100 | 1 | host=db_node3 dbname=repmgr user=repmgr
|
||||||
|
4 | node4 | standby | running | node1 | default | 100 | 1 | host=db_node4 dbname=repmgr user=repmgr
|
||||||
|
5 | node5 | witness | * running | node1 | default | 0 | n/a | host=db_node5 dbname=repmgr user=repmgr</programlisting>
|
||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
<refsect1>
|
<refsect1>
|
||||||
@@ -82,18 +83,22 @@
|
|||||||
(but <literal>node3</literal> is not attached to it, and its metadata has not yet been updated);
|
(but <literal>node3</literal> is not attached to it, and its metadata has not yet been updated);
|
||||||
<literal>node4</literal> is running but rejecting connections (from <literal>node3</literal> at least).
|
<literal>node4</literal> is running but rejecting connections (from <literal>node3</literal> at least).
|
||||||
<programlisting>
|
<programlisting>
|
||||||
ID | Name | Role | Status | Upstream | Location | Priority | Connection string
|
ID | Name | Role | Status | Upstream | Location | Priority | Timeline | Connection string
|
||||||
----+-------+---------+----------------------+----------+----------+----------+-----------------------------------------
|
----+-------+---------+----------------------+----------+----------+----------+----------+----------------------------------------------------
|
||||||
1 | node1 | primary | ? unreachable | | default | 100 | host=db_node1 dbname=repmgr user=repmgr
|
1 | node1 | primary | ? unreachable | | default | 100 | | host=db_node1 dbname=repmgr user=repmgr
|
||||||
2 | node2 | standby | ! running as primary | node1 | default | 100 | host=db_node2 dbname=repmgr user=repmgr
|
2 | node2 | standby | ! running as primary | ? node1 | default | 100 | 2 | host=db_node2 dbname=repmgr user=repmgr
|
||||||
3 | node3 | standby | running | node1 | default | 100 | host=db_node3 dbname=repmgr user=repmgr
|
3 | node3 | standby | running | ? node1 | default | 100 | 1 | host=db_node3 dbname=repmgr user=repmgr
|
||||||
4 | node4 | standby | ? running | node1 | default | 100 | host=db_node4 dbname=repmgr user=repmgr
|
4 | node4 | standby | ? running | ? node1 | default | 100 | | host=db_node4 dbname=repmgr user=repmgr
|
||||||
|
|
||||||
WARNING: following issues were detected
|
WARNING: following issues were detected
|
||||||
- unable to connect to node "node1" (ID: 1)
|
- unable to connect to node "node1" (ID: 1)
|
||||||
- node "node1" (ID: 1) is registered as an active primary but is unreachable
|
- node "node1" (ID: 1) is registered as an active primary but is unreachable
|
||||||
- node "node2" (ID: 2) is registered as standby but running as primary
|
- node "node2" (ID: 2) is registered as standby but running as primary
|
||||||
- unable to connect to node "node4" (ID: 4)
|
- unable to connect to node "node2" (ID: 2)'s upstream node "node1" (ID: 1)
|
||||||
|
- unable to determine if node "node2" (ID: 2) is attached to its upstream node "node1" (ID: 1)
|
||||||
|
- unable to connect to node "node3" (ID: 3)'s upstream node "node1" (ID: 1)
|
||||||
|
- unable to determine if node "node3" (ID: 3) is attached to its upstream node "node1" (ID: 1)
|
||||||
|
- unable to connect to node "node4" (ID: 4)
|
||||||
HINT: execute with --verbose option to see connection error messages</programlisting>
|
HINT: execute with --verbose option to see connection error messages</programlisting>
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -31,15 +31,32 @@
|
|||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Example</title>
|
<title>Example</title>
|
||||||
<para>
|
<para>
|
||||||
|
Execution on the primary server:
|
||||||
<programlisting>
|
<programlisting>
|
||||||
$ repmgr -f /etc/repmgr.conf node check
|
$ repmgr -f /etc/repmgr.conf node check
|
||||||
Node "node1":
|
Node "node1":
|
||||||
Server role: OK (node is primary)
|
Server role: OK (node is primary)
|
||||||
Replication lag: OK (N/A - node is primary)
|
Replication lag: OK (N/A - node is primary)
|
||||||
WAL archiving: OK (0 pending files)
|
WAL archiving: OK (0 pending files)
|
||||||
|
Upstream connection: OK (N/A - is primary)
|
||||||
Downstream servers: OK (2 of 2 downstream nodes attached)
|
Downstream servers: OK (2 of 2 downstream nodes attached)
|
||||||
Replication slots: OK (node has no physical replication slots)
|
Replication slots: OK (node has no physical replication slots)
|
||||||
Missing replication slots: OK (node has no missing physical replication slots)</programlisting>
|
Missing replication slots: OK (node has no missing physical replication slots)
|
||||||
|
Configured data directory: OK (configured "data_directory" is "/var/lib/postgresql/data")</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Execution on a standby server:
|
||||||
|
<programlisting>
|
||||||
|
$ repmgr -f /etc/repmgr.conf node check
|
||||||
|
Node "node2":
|
||||||
|
Server role: OK (node is standby)
|
||||||
|
Replication lag: OK (0 seconds)
|
||||||
|
WAL archiving: OK (0 pending archive ready files)
|
||||||
|
Upstream connection: OK (node "node2" (ID: 2) is attached to expected upstream node "node1" (ID: 1))
|
||||||
|
Downstream servers: OK (this node has no downstream nodes)
|
||||||
|
Replication slots: OK (node has no physical replication slots)
|
||||||
|
Missing physical replication slots: OK (node has no missing physical replication slots)
|
||||||
|
Configured data directory: OK (configured "data_directory" is "/var/lib/postgresql/data")</programlisting>
|
||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
<refsect1>
|
<refsect1>
|
||||||
@@ -57,20 +74,20 @@
|
|||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<literal>--role</literal>: checks if the node has the expected role
|
<option>--role</option>: checks if the node has the expected role
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<literal>--replication-lag</literal>: checks if the node is lagging by more than
|
<option>--replication-lag</option>: checks if the node is lagging by more than
|
||||||
<varname>replication_lag_warning</varname> or <varname>replication_lag_critical</varname>
|
<varname>replication_lag_warning</varname> or <varname>replication_lag_critical</varname>
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<literal>--archive-ready</literal>: checks for WAL files which have not yet been archived,
|
<option>--archive-ready</option>: checks for WAL files which have not yet been archived,
|
||||||
and returns <literal>WARNING</literal> or <literal>CRITICAL</literal> if the number
|
and returns <literal>WARNING</literal> or <literal>CRITICAL</literal> if the number
|
||||||
exceeds <varname>archive_ready_warning</varname> or <varname>archive_ready_critical</varname> respectively.
|
exceeds <varname>archive_ready_warning</varname> or <varname>archive_ready_critical</varname> respectively.
|
||||||
</simpara>
|
</simpara>
|
||||||
@@ -78,25 +95,31 @@
|
|||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<literal>--downstream</literal>: checks that the expected downstream nodes are attached
|
<option>--downstream</option>: checks that the expected downstream nodes are attached
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<literal>--slots</literal>: checks there are no inactive physical replication slots
|
<option>--upstream</option>: checks that the node is attached to its expected upstream
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<literal>--missing-slots</literal>: checks there are no missing physical replication slots
|
<option>--slots</option>: checks there are no inactive physical replication slots
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<literal>--data-directory-config</literal>: checks the data directory configured in
|
<option>--missing-slots</option>: checks there are no missing physical replication slots
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<option>--data-directory-config</option>: checks the data directory configured in
|
||||||
<filename>repmgr.conf</filename> matches the actual data directory.
|
<filename>repmgr.conf</filename> matches the actual data directory.
|
||||||
This check is not directly related to replication, but is useful to verify &repmgr;
|
This check is not directly related to replication, but is useful to verify &repmgr;
|
||||||
is correctly configured.
|
is correctly configured.
|
||||||
@@ -108,6 +131,22 @@
|
|||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>Connection options</title>
|
||||||
|
<para>
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
<option>-S</option>/<option>--superuser</option>: connect as the
|
||||||
|
named superuser instead of the &repmgr; user
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Output format</title>
|
<title>Output format</title>
|
||||||
<para>
|
<para>
|
||||||
@@ -115,14 +154,14 @@
|
|||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<literal>--csv</literal>: generate output in CSV format (not available
|
<option>--csv</option>: generate output in CSV format (not available
|
||||||
for individual checks)
|
for individual checks)
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<literal>--nagios</literal>: generate output in a Nagios-compatible format
|
<option>--nagios</option>: generate output in a Nagios-compatible format
|
||||||
(for individual checks only)
|
(for individual checks only)
|
||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
@@ -130,13 +169,15 @@
|
|||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Exit codes</title>
|
<title>Exit codes</title>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
When executing <command>repmgr node check</command> with one of the individual
|
When executing <command>repmgr node check</command> with one of the individual
|
||||||
checks listed above, &repmgr; will emit one of the following Nagios-style exit codes
|
checks listed above, &repmgr; will emit one of the following Nagios-style exit codes
|
||||||
(even if <literal>--nagios</literal> is not supplied):
|
(even if <option>--nagios</option> is not supplied):
|
||||||
|
|
||||||
<itemizedlist spacing="compact" mark="bullet">
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
|
||||||
|
|||||||
@@ -43,7 +43,12 @@
|
|||||||
<programlisting>
|
<programlisting>
|
||||||
repmgr node rejoin -d '$conninfo'</programlisting>
|
repmgr node rejoin -d '$conninfo'</programlisting>
|
||||||
|
|
||||||
where <literal>$conninfo</literal> is the conninfo string of any reachable node in the cluster.
|
where <literal>$conninfo</literal> is the PostgreSQL <literal>conninfo</literal> string of the
|
||||||
|
<emphasis>current</emphasis> primary node (or that of any reachable node in the cluster, but
|
||||||
|
<emphasis>not</emphasis> the local node). This is so that &repmgr; can fetch up-to-date information
|
||||||
|
about the current state of the cluster.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
<filename>repmgr.conf</filename> for the stopped node *must* be supplied explicitly if not
|
<filename>repmgr.conf</filename> for the stopped node *must* be supplied explicitly if not
|
||||||
otherwise available.
|
otherwise available.
|
||||||
</para>
|
</para>
|
||||||
@@ -283,7 +288,15 @@
|
|||||||
to execute <command>pg_rewind</command> to ensure the node can be rejoined successfully.
|
to execute <command>pg_rewind</command> to ensure the node can be rejoined successfully.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<important>
|
<refsect2 id="repmgr-node-rejoin-pg-rewind-config-files" xreflabel="pg_rewind and configuration files">
|
||||||
|
|
||||||
|
<title><command>pg_rewind</command> and configuration file retention</title>
|
||||||
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>pg_rewind</primary>
|
||||||
|
<secondary>configuration file retention</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
Be aware that if <command>pg_rewind</command> is executed and actually performs a
|
Be aware that if <command>pg_rewind</command> is executed and actually performs a
|
||||||
rewind operation, any configuration files in the PostgreSQL data directory will be
|
rewind operation, any configuration files in the PostgreSQL data directory will be
|
||||||
@@ -291,17 +304,27 @@
|
|||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
To prevent this happening, provide a comma-separated list of files to retain
|
To prevent this happening, provide a comma-separated list of files to retain
|
||||||
using the <literal>--config-file</literal> command line option; the specified files
|
using the <option>--config-file</option> command line option; the specified files
|
||||||
will be archived in a temporary directory (whose parent directory can be specified with
|
will be archived in a temporary directory (whose parent directory can be specified with
|
||||||
<literal>--config-archive-dir</literal>) and restored once the rewind operation is
|
<option>--config-archive-dir</option>, default: <filename>/tmp</filename>)
|
||||||
complete.
|
and restored once the rewind operation is complete.
|
||||||
</para>
|
</para>
|
||||||
</important>
|
</refsect2>
|
||||||
|
|
||||||
<para>
|
<refsect2 id="repmgr-node-rejoin-pg-rewind-example" xreflabel="example using repmgr node rejoin and pg_rewind">
|
||||||
Example, first using <literal>--dry-run</literal>, then actually executing the
|
|
||||||
<literal>node rejoin command</literal>.
|
<title>Example using <command>repmgr node rejoin</command> and <command>pg_rewind</command></title>
|
||||||
<programlisting>
|
|
||||||
|
<indexterm>
|
||||||
|
<primary>pg_rewind</primary>
|
||||||
|
<secondary>configuration file retention</secondary>
|
||||||
|
</indexterm>
|
||||||
|
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Example, first using <option>--dry-run</option>, then actually executing the
|
||||||
|
<literal>node rejoin command</literal>.
|
||||||
|
<programlisting>
|
||||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \
|
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \
|
||||||
--config-files=postgresql.local.conf,postgresql.conf --verbose --force-rewind --dry-run
|
--config-files=postgresql.local.conf,postgresql.conf --verbose --force-rewind --dry-run
|
||||||
INFO: replication connection to the rejoin target node was successful
|
INFO: replication connection to the rejoin target node was successful
|
||||||
@@ -317,17 +340,17 @@
|
|||||||
pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node3 dbname=repmgr user=repmgr'
|
pg_rewind -D '/var/lib/postgresql/data' --source-server='host=node3 dbname=repmgr user=repmgr'
|
||||||
INFO: prerequisites for executing NODE REJOIN are met</programlisting>
|
INFO: prerequisites for executing NODE REJOIN are met</programlisting>
|
||||||
|
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
If <option>--force-rewind</option> is used with the <option>--dry-run</option> option,
|
If <option>--force-rewind</option> is used with the <option>--dry-run</option> option,
|
||||||
this checks the prerequisites for using <application>pg_rewind</application>, but is
|
this checks the prerequisites for using <application>pg_rewind</application>, but is
|
||||||
not an absolute guarantee that actually executing <application>pg_rewind</application>
|
not an absolute guarantee that actually executing <application>pg_rewind</application>
|
||||||
will succeed. See also section <xref linkend="repmgr-node-rejoin-caveats"/> below.
|
will succeed. See also section <xref linkend="repmgr-node-rejoin-caveats"/> below.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
</note>
|
</note>
|
||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \
|
$ repmgr node rejoin -f /etc/repmgr.conf -d 'host=node3 dbname=repmgr user=repmgr' \
|
||||||
--config-files=postgresql.local.conf,postgresql.conf --verbose --force-rewind
|
--config-files=postgresql.local.conf,postgresql.conf --verbose --force-rewind
|
||||||
NOTICE: pg_rewind execution required for this node to attach to rejoin target node 3
|
NOTICE: pg_rewind execution required for this node to attach to rejoin target node 3
|
||||||
@@ -339,8 +362,8 @@
|
|||||||
NOTICE: starting server using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' start"
|
NOTICE: starting server using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/pgsql/data' start"
|
||||||
NOTICE: NODE REJOIN successful
|
NOTICE: NODE REJOIN successful
|
||||||
DETAIL: node 2 is now attached to node 3</programlisting>
|
DETAIL: node 2 is now attached to node 3</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
</refsect2>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1 id="repmgr-node-rejoin-caveats" xreflabel="Caveats">
|
<refsect1 id="repmgr-node-rejoin-caveats" xreflabel="Caveats">
|
||||||
@@ -378,6 +401,51 @@
|
|||||||
is running in <option>--dry-run</option> mode.
|
is running in <option>--dry-run</option> mode.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<warning>
|
||||||
|
<para>
|
||||||
|
In all current PostgreSQL versions (as of September 2020), <application>pg_rewind</application>
|
||||||
|
contains a corner-case bug which affects standbys in a very specific situation.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This situation occurs when a standby was shut down <emphasis>before</emphasis> its
|
||||||
|
primary node, and an attempt is made to attach this standby to another primary
|
||||||
|
in the same cluster (following a "split brain" situation where the standby
|
||||||
|
was connected to the wrong primary). In this case, &repmgr; will correctly determine
|
||||||
|
that <application>pg_rewind</application> should be executed, however
|
||||||
|
<application>pg_rewind</application> incorrectly decides that no action is necessary.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
In this situation, &repmgr; will report something like:
|
||||||
|
<programlisting>
|
||||||
|
NOTICE: pg_rewind execution required for this node to attach to rejoin target node 1
|
||||||
|
DETAIL: rejoin target server's timeline 3 forked off current database system timeline 2 before current recovery point 0/7019C10</programlisting>
|
||||||
|
but when executed, <application>pg_rewind</application> will report:
|
||||||
|
<programlisting>
|
||||||
|
pg_rewind: servers diverged at WAL location 0/7015540 on timeline 2
|
||||||
|
pg_rewind: no rewind required</programlisting>
|
||||||
|
and if an attempt is made to attach the standby to the new primary, PostgreSQL logs on the standby
|
||||||
|
will contain errors like:
|
||||||
|
<programlisting>
|
||||||
|
[2020-09-07 15:01:41 UTC] LOG: 00000: replication terminated by primary server
|
||||||
|
[2020-09-07 15:01:41 UTC] DETAIL: End of WAL reached on timeline 2 at 0/7015540.
|
||||||
|
[2020-09-07 15:01:41 UTC] LOG: 00000: new timeline 3 forked off current database system timeline 2 before current recovery point 0/7019C10</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Currently it is not possible to resolve this situation using <application>pg_rewind</application>.
|
||||||
|
A <ulink url="https://www.postgresql.org/message-id/flat/CABvVfJU-LDWvoz4-Yow3Ay5LZYTuPD7eSjjE4kGyNZpXC6FrVQ@mail.gmail.com">patch</ulink>
|
||||||
|
has been submitted and will hopefully be included in a forthcoming PostgreSQL minor release.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
As a workaround, start the primary server the standby was previously attached to,
|
||||||
|
and ensure the standby can be attached to it. If <application>pg_rewind</application> was actually executed,
|
||||||
|
it will have copied in the <filename>.history</filename> file from the target primary server; this must
|
||||||
|
be removed. <command>repmgr node rejoin</command> can then be used to attach the standby to the original
|
||||||
|
primary. Ensure any changes pending on the primary have propogated to the standby. Then shut down the primary
|
||||||
|
server <emphasis>first</emphasis>, before shutting down the standby. It should then be possible to
|
||||||
|
use <command>repmgr node rejoin</command> to attach the standby to the new primary.
|
||||||
|
</para>
|
||||||
|
</warning>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
|
|||||||
@@ -75,8 +75,22 @@
|
|||||||
<para>
|
<para>
|
||||||
Issue a <command>CHECKPOINT</command> before stopping or restarting the node.
|
Issue a <command>CHECKPOINT</command> before stopping or restarting the node.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that a superuser connection is required to be able to execute the
|
||||||
|
<command>CHECKPOINT</command> command.
|
||||||
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-S</option>/<option>--superuser</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Connect as the named superuser instead of the normal &repmgr; user.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
</variablelist>
|
</variablelist>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|||||||
@@ -24,10 +24,15 @@
|
|||||||
|
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
It's possibly to install the &repmgr; extension manually before executing
|
&repmgr; will attempt to install the &repmgr; extension as part of this command,
|
||||||
|
however this will fail if the <literal>repmgr</literal> user is not a superuser.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
It's possible to install the &repmgr; extension manually before executing
|
||||||
<command>repmgr primary register</command>; in this case &repmgr; will
|
<command>repmgr primary register</command>; in this case &repmgr; will
|
||||||
detect the presence of the extension and skip that step.
|
detect the presence of the extension and skip that step.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
</note>
|
</note>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
@@ -59,6 +64,21 @@
|
|||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>User permission requirements</title>
|
||||||
|
<para>
|
||||||
|
The <literal>repmgr</literal> user must be a superuser in order for &repmgr;
|
||||||
|
to be able to install the <literal>repmgr</literal> extension.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If this is not the case, the <literal>repmgr</literal> extension can be installed
|
||||||
|
manually before executing <command>repmgr primary register</command>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
A future &repmgr; release will enable the provision of a <option>--superuser</option>
|
||||||
|
name for the installation of the extension.
|
||||||
|
</para>
|
||||||
|
</refsect1>
|
||||||
<refsect1>
|
<refsect1>
|
||||||
|
|
||||||
<title>Options</title>
|
<title>Options</title>
|
||||||
|
|||||||
@@ -188,18 +188,18 @@
|
|||||||
</note>
|
</note>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
<!-- don't rename this id as it may be used in external links -->
|
||||||
<refsect1 id="repmgr-standby-create-recovery-conf">
|
<refsect1 id="repmgr-standby-create-recovery-conf">
|
||||||
|
|
||||||
<title>Using a standby cloned by another method</title>
|
<title>Using a standby cloned by another method</title>
|
||||||
|
|
||||||
<indexterm>
|
<indexterm>
|
||||||
<primary>recovery.conf</primary>
|
<primary>replication configuration</primary>
|
||||||
<secondary>generating for a standby cloned by another method</secondary>
|
<secondary>generating for a standby cloned by another method</secondary>
|
||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
<indexterm>
|
<indexterm>
|
||||||
<primary>replication configuration</primary>
|
<primary>recovery.conf</primary>
|
||||||
<secondary>generating for a standby cloned by another method</secondary>
|
<secondary>generating for a standby cloned by another method</secondary>
|
||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
@@ -224,7 +224,7 @@
|
|||||||
</para>
|
</para>
|
||||||
</tip>
|
</tip>
|
||||||
<para>
|
<para>
|
||||||
Then execute the command <command>repmgr standby clone --recovery-conf-only</command>.
|
Then execute the command <command>repmgr standby clone --replication-conf-only</command>.
|
||||||
This will create the <filename>recovery.conf</filename> file needed to attach
|
This will create the <filename>recovery.conf</filename> file needed to attach
|
||||||
the node to its upstream (in PostgreSQL 12 and later: append replication configuration
|
the node to its upstream (in PostgreSQL 12 and later: append replication configuration
|
||||||
to <filename>postgresql.auto.conf</filename>), and will also create a replication slot on the
|
to <filename>postgresql.auto.conf</filename>), and will also create a replication slot on the
|
||||||
@@ -236,7 +236,7 @@
|
|||||||
<option>-F/--force</option> option is provided.
|
<option>-F/--force</option> option is provided.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
Execute <command>repmgr standby clone --recovery-conf-only --dry-run</command>
|
Execute <command>repmgr standby clone --replication-conf-only --dry-run</command>
|
||||||
to check the prerequisites for creating the recovery configuration,
|
to check the prerequisites for creating the recovery configuration,
|
||||||
and display the contents of the configuration which would be added without actually
|
and display the contents of the configuration which would be added without actually
|
||||||
making any changes.
|
making any changes.
|
||||||
@@ -266,7 +266,7 @@
|
|||||||
Check prerequisites but don't actually clone the standby.
|
Check prerequisites but don't actually clone the standby.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
If <option>--recovery-conf-only</option> specified, the contents of
|
If <option>--replication-conf-only</option> specified, the contents of
|
||||||
the generated recovery configuration will be displayed
|
the generated recovery configuration will be displayed
|
||||||
but not written.
|
but not written.
|
||||||
</para>
|
</para>
|
||||||
@@ -312,7 +312,7 @@
|
|||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><option> --recovery-conf-only</option></term>
|
<term><option>--replication-conf-only</option></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Create recovery configuration for a previously cloned instance.
|
Create recovery configuration for a previously cloned instance.
|
||||||
|
|||||||
@@ -86,6 +86,7 @@
|
|||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Example</title>
|
<title>Example</title>
|
||||||
<para>
|
<para>
|
||||||
@@ -94,13 +95,46 @@
|
|||||||
NOTICE: promoting standby to primary
|
NOTICE: promoting standby to primary
|
||||||
DETAIL: promoting server "node2" (ID: 2) using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/postgres/data' promote"
|
DETAIL: promoting server "node2" (ID: 2) using "pg_ctl -l /var/log/postgres/startup.log -w -D '/var/lib/postgres/data' promote"
|
||||||
server promoting
|
server promoting
|
||||||
DEBUG: setting node 2 as primary and marking existing primary as failed
|
|
||||||
NOTICE: STANDBY PROMOTE successful
|
NOTICE: STANDBY PROMOTE successful
|
||||||
DETAIL: server "node2" (ID: 2) was successfully promoted to primary</programlisting>
|
DETAIL: server "node2" (ID: 2) was successfully promoted to primary</programlisting>
|
||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
<title>User permission requirements</title>
|
||||||
|
<para><emphasis>pg_promote() (PostgreSQL 12 and later)</emphasis></para>
|
||||||
|
<para>
|
||||||
|
From PostgreSQL 12, &repmgr; will attempt to use the built-in <function>pg_promote()</function>
|
||||||
|
function to promote a standby to primary.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
By default, execution of <function>pg_promote()</function> is restricted to superusers.
|
||||||
|
If the <literal>repmgr</literal> user does not have permission to execute
|
||||||
|
<function>pg_promote()</function>, &repmgr; will fall back to using "<command>pg_ctl promote</command>".
|
||||||
|
</para>
|
||||||
|
<tip>
|
||||||
|
<para>
|
||||||
|
Execute <command>repmgr standby promote</command> with the <option>--dry-run</option>
|
||||||
|
to check whether the &repmgr; user has permission to execute <function>pg_promote()</function>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If the <literal>repmgr</literal> user is not a superuser, execution permission for this
|
||||||
|
function can be granted with e.g.:
|
||||||
|
<programlisting>
|
||||||
|
GRANT EXECUTE ON FUNCTION pg_catalog.pg_promote TO repmgr</programlisting>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that permissions are only effective for the database they are granted in, so
|
||||||
|
this <emphasis>must</emphasis> be executed in the &repmgr; database to be effective.
|
||||||
|
</para>
|
||||||
|
</tip>
|
||||||
|
<para>
|
||||||
|
For more details on <function>pg_promote()</function>, see the
|
||||||
|
<ulink url="https://www.postgresql.org/docs/current/functions-admin.html#FUNCTIONS-RECOVERY-CONTROL-TABLE">PostgreSQL documentation</ulink>.
|
||||||
|
</para>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
<title>Options</title>
|
<title>Options</title>
|
||||||
<variablelist>
|
<variablelist>
|
||||||
@@ -135,6 +169,42 @@
|
|||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-F</option></term>
|
||||||
|
<term><option>--force</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Ignore warnings and continue anyway.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
This option is relevant in the following situations if <option>--siblings-follow</option> was specified:
|
||||||
|
<itemizedlist spacing="compact" mark="bullet">
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
If one or more sibling nodes was not reachable via SSH, the standby will be promoted anyway.
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
If the promotion candidate has insufficient free walsenders to accomodate the standbys which will
|
||||||
|
be attached to it, the standby will be promoted anyway.
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<simpara>
|
||||||
|
If replication slots are in use but the promotion candidate has insufficient free replication slots
|
||||||
|
to accomodate the standbys which will be attached to it, the standby will be promoted anyway.
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that if the <option>-F</option>/<option>--force</option> option is used when any of the above
|
||||||
|
situations is encountered, the onus is on the user to manually resolve any resulting issues.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
</variablelist>
|
</variablelist>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
@@ -172,6 +242,23 @@
|
|||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
|
<listitem>
|
||||||
|
<indexterm>
|
||||||
|
<primary>service_promote_command</primary>
|
||||||
|
<secondary>with "repmgr standby promote "</secondary>
|
||||||
|
</indexterm>
|
||||||
|
<simpara>
|
||||||
|
<literal>service_promote_command</literal>:
|
||||||
|
a command which will be executed instead of <command>pg_ctl promote</command>
|
||||||
|
or (in PostgreSQL 12 and later) <function>pg_promote()</function>.
|
||||||
|
</simpara>
|
||||||
|
<simpara>
|
||||||
|
This is intended for systems which provide a package-level promote command,
|
||||||
|
such as Debian's <application>pg_ctlcluster</application>, to promote the
|
||||||
|
PostgreSQL from standby to primary.
|
||||||
|
</simpara>
|
||||||
|
</listitem>
|
||||||
|
|
||||||
</itemizedlist>
|
</itemizedlist>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
|||||||
@@ -111,7 +111,7 @@
|
|||||||
<command><ulink url="https://docs.pgbarman.org/#recover">barman recover</ulink></command>
|
<command><ulink url="https://docs.pgbarman.org/#recover">barman recover</ulink></command>
|
||||||
command), register the node as detailed in section
|
command), register the node as detailed in section
|
||||||
<xref linkend="repmgr-standby-register-inactive-node"/> then execute
|
<xref linkend="repmgr-standby-register-inactive-node"/> then execute
|
||||||
<link linkend="repmgr-standby-create-recovery-conf">repmgr standby clone --recovery-conf-only</link>
|
<link linkend="repmgr-standby-create-recovery-conf">repmgr standby clone --replication-conf-only</link>
|
||||||
to generate the appropriate replication configuration.
|
to generate the appropriate replication configuration.
|
||||||
</para>
|
</para>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|||||||
@@ -63,6 +63,38 @@
|
|||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1>
|
<refsect1>
|
||||||
|
<title>User permission requirements</title>
|
||||||
|
<para><emphasis>CHECKPOINT</emphasis></para>
|
||||||
|
<para>
|
||||||
|
&repmgr; executes <command>CHECKPOINT</command> on the demotion candidate as part of the shutdown
|
||||||
|
process to ensure it shuts down as smoothly as possible.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that <command>CHECKPOINT</command> requires database superuser permissions to execute.
|
||||||
|
If the <literal>repmgr</literal> user is not a superuser, the name of a superuser should be
|
||||||
|
provided with the <option>-S</option>/<option>--superuser</option>.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
If &repmgr; is unable to execute the <command>CHECKPOINT</command> command, the switchover
|
||||||
|
can still be carried out, albeit at a greater risk that the demotion candidate may not
|
||||||
|
be able to shut down as smoothly as might otherwise have been the case.
|
||||||
|
</para>
|
||||||
|
<para><emphasis>pg_promote() (PostgreSQL 12 and later)</emphasis></para>
|
||||||
|
<para>
|
||||||
|
From PostgreSQL 12, &repmgr; defaults to using the built-in <command>pg_promote()</command> function to
|
||||||
|
promote a standby to primary.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Note that execution of <function>pg_promote()</function> is restricted to superusers or to
|
||||||
|
any user who has been granted execution permission for this function. If the &repmgr; user
|
||||||
|
is not permitted to execute <function>pg_promote()</function>, &repmgr; will fall back to using
|
||||||
|
"<command>pg_ctl promote</command>". For more details see
|
||||||
|
<link linkend="repmgr-standby-promote">repmgr standby promote</link>.
|
||||||
|
</para>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
|
||||||
<title>Options</title>
|
<title>Options</title>
|
||||||
<variablelist>
|
<variablelist>
|
||||||
|
|
||||||
@@ -184,6 +216,17 @@
|
|||||||
</note>
|
</note>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-S</option>/<option>--superuser</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Use the named superuser instead of the normal &repmgr; user to perform
|
||||||
|
actions requiring superuser permissions.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
</variablelist>
|
</variablelist>
|
||||||
|
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|||||||
@@ -63,6 +63,34 @@
|
|||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<refsect1>
|
||||||
|
|
||||||
|
<title>Options</title>
|
||||||
|
<variablelist>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--dry-run</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Check prerequisites but don't actually register the witness
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>-F</option>/<option>--force</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Overwrite an existing node record
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
</variablelist>
|
||||||
|
</refsect1>
|
||||||
|
|
||||||
<refsect1 id="repmgr-witness-register-events">
|
<refsect1 id="repmgr-witness-register-events">
|
||||||
<title>Event notifications</title>
|
<title>Event notifications</title>
|
||||||
<para>
|
<para>
|
||||||
|
|||||||
@@ -91,7 +91,6 @@
|
|||||||
&repmgrd-automatic-failover;
|
&repmgrd-automatic-failover;
|
||||||
&repmgrd-configuration;
|
&repmgrd-configuration;
|
||||||
&repmgrd-operation;
|
&repmgrd-operation;
|
||||||
&repmgrd-bdr;
|
|
||||||
</part>
|
</part>
|
||||||
|
|
||||||
<part id="repmgr-command-reference">
|
<part id="repmgr-command-reference">
|
||||||
|
|||||||
@@ -192,8 +192,8 @@
|
|||||||
connected. Beginning with <link linkend="release-4.4">&repmgr; 4.4</link>
|
connected. Beginning with <link linkend="release-4.4">&repmgr; 4.4</link>
|
||||||
it is now possible for the affected standbys to build a consensus about whether
|
it is now possible for the affected standbys to build a consensus about whether
|
||||||
the primary is still available to some standbys ("primary visibility consensus").
|
the primary is still available to some standbys ("primary visibility consensus").
|
||||||
This is done by polling each standby for the time it last saw the primary;
|
This is done by polling each standby (and the witness, if present) for the time it last saw the
|
||||||
if any have seen the primary very recently, it's reasonable
|
primary; if any have seen the primary very recently, it's reasonable
|
||||||
to infer that the primary is still available and a failover should not be started.
|
to infer that the primary is still available and a failover should not be started.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
@@ -205,9 +205,10 @@
|
|||||||
<programlisting>$ repmgr -f /etc/repmgr.conf service status
|
<programlisting>$ repmgr -f /etc/repmgr.conf service status
|
||||||
ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen
|
ID | Name | Role | Status | Upstream | repmgrd | PID | Paused? | Upstream last seen
|
||||||
----+-------+---------+-----------+----------+---------+-------+---------+--------------------
|
----+-------+---------+-----------+----------+---------+-------+---------+--------------------
|
||||||
1 | node1 | primary | * running | | running | 96563 | no | n/a
|
1 | node1 | primary | * running | | running | 27259 | no | n/a
|
||||||
2 | node2 | standby | running | node1 | running | 96572 | no | 1 second(s) ago
|
2 | node2 | standby | running | node1 | running | 27272 | no | 1 second(s) ago
|
||||||
3 | node3 | standby | running | node1 | running | 96584 | no | 0 second(s) ago</programlisting>
|
3 | node3 | standby | running | node1 | running | 27282 | no | 0 second(s) ago
|
||||||
|
4 | node4 | witness | * running | node1 | running | 27298 | no | 1 second(s) ago</programlisting>
|
||||||
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
|||||||
@@ -1,429 +0,0 @@
|
|||||||
<chapter id="repmgrd-bdr">
|
|
||||||
<title>BDR failover with repmgrd</title>
|
|
||||||
|
|
||||||
<indexterm>
|
|
||||||
<primary>repmgrd</primary>
|
|
||||||
<secondary>BDR</secondary>
|
|
||||||
</indexterm>
|
|
||||||
|
|
||||||
<indexterm>
|
|
||||||
<primary>BDR</primary>
|
|
||||||
</indexterm>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
&repmgr; 4.x provides support for monitoring a pair of BDR 2.x nodes and taking action in
|
|
||||||
case one of the nodes fails.
|
|
||||||
</para>
|
|
||||||
<note>
|
|
||||||
<simpara>
|
|
||||||
Due to the nature of BDR 1.x/2.x, it's only safe to use this solution for
|
|
||||||
a two-node scenario. Introducing additional nodes will create an inherent
|
|
||||||
risk of node desynchronisation if a node goes down without being cleanly
|
|
||||||
removed from the cluster.
|
|
||||||
</simpara>
|
|
||||||
</note>
|
|
||||||
<para>
|
|
||||||
In contrast to streaming replication, there's no concept of "promoting" a new
|
|
||||||
primary node with BDR. Instead, "failover" involves monitoring both nodes
|
|
||||||
with &repmgrd; and redirecting queries from the failed node to the remaining
|
|
||||||
active node. This can be done by using an
|
|
||||||
<link linkend="event-notifications">event notification</link> script
|
|
||||||
which is called by &repmgrd; to dynamically
|
|
||||||
reconfigure a proxy server/connection pooler such as <application>PgBouncer</application>.
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<note>
|
|
||||||
<simpara>
|
|
||||||
This &repmgr; functionality is for BDR 2.x only running on PostgreSQL 9.4/9.6.
|
|
||||||
It is <emphasis>not</emphasis> required for later BDR versions.
|
|
||||||
</simpara>
|
|
||||||
</note>
|
|
||||||
|
|
||||||
<sect1 id="bdr-prerequisites" xreflabel="BDR prequisites">
|
|
||||||
<title>Prerequisites</title>
|
|
||||||
<important>
|
|
||||||
<para>
|
|
||||||
This &repmgr; functionality is for BDR 2.x only running on PostgreSQL 9.4/9.6.
|
|
||||||
It is <emphasis>not</emphasis> required for later BDR versions.
|
|
||||||
</para>
|
|
||||||
</important>
|
|
||||||
<para>
|
|
||||||
&repmgr; 4 requires PostgreSQL 9.4 or 9.6 with the BDR 2 extension
|
|
||||||
enabled and configured for a two-node BDR network. &repmgr; 4 packages
|
|
||||||
must be installed on each node before attempting to configure
|
|
||||||
<application>repmgr</application>.
|
|
||||||
</para>
|
|
||||||
<note>
|
|
||||||
<simpara>
|
|
||||||
&repmgr; 4 will refuse to install if it detects more than two BDR nodes.
|
|
||||||
</simpara>
|
|
||||||
</note>
|
|
||||||
<para>
|
|
||||||
Application database connections *must* be passed through a proxy server/
|
|
||||||
connection pooler such as <application>PgBouncer</application>, and it must be possible to dynamically
|
|
||||||
reconfigure that from &repmgrd;. The example demonstrated in this document
|
|
||||||
will use <application>PgBouncer</application>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
The proxy server / connection poolers must <emphasis>not</emphasis>
|
|
||||||
be installed on the database servers.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
For this example, it's assumed password-less SSH connections are available
|
|
||||||
from the PostgreSQL servers to the servers where <application>PgBouncer</application>
|
|
||||||
runs, and that the user on those servers has permission to alter the
|
|
||||||
<application>PgBouncer</application> configuration files.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
PostgreSQL connections must be possible between each node, and each node
|
|
||||||
must be able to connect to each PgBouncer instance.
|
|
||||||
</para>
|
|
||||||
</sect1>
|
|
||||||
|
|
||||||
<sect1 id="bdr-configuration" xreflabel="BDR configuration">
|
|
||||||
<title>Configuration</title>
|
|
||||||
<para>
|
|
||||||
A sample configuration for <filename>repmgr.conf</filename> on each
|
|
||||||
BDR node would look like this:
|
|
||||||
<programlisting>
|
|
||||||
# Node information
|
|
||||||
node_id=1
|
|
||||||
node_name='node1'
|
|
||||||
conninfo='host=node1 dbname=bdrtest user=repmgr connect_timeout=2'
|
|
||||||
data_directory='/var/lib/postgresql/data'
|
|
||||||
replication_type='bdr'
|
|
||||||
|
|
||||||
# Event notification configuration
|
|
||||||
event_notifications=bdr_failover
|
|
||||||
event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a" >> /tmp/bdr-failover.log 2>&1'
|
|
||||||
|
|
||||||
# repmgrd options
|
|
||||||
monitor_interval_secs=5
|
|
||||||
reconnect_attempts=6
|
|
||||||
reconnect_interval=5</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Adjust settings as appropriate; copy and adjust for the second node (particularly
|
|
||||||
the values <varname>node_id</varname>, <varname>node_name</varname>
|
|
||||||
and <varname>conninfo</varname>).
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Note that the values provided for the <varname>conninfo</varname> string
|
|
||||||
must be valid for connections from <emphasis>both</emphasis> nodes in the
|
|
||||||
replication cluster. The database must be the BDR-enabled database.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
If defined, the <varname>event_notifications</varname> parameter will restrict
|
|
||||||
execution of the script defined in <varname>event_notification_command</varname>
|
|
||||||
to the specified event(s).
|
|
||||||
</para>
|
|
||||||
<note>
|
|
||||||
<simpara>
|
|
||||||
<varname>event_notification_command</varname> is the script which does the actual "heavy lifting"
|
|
||||||
of reconfiguring the proxy server/ connection pooler. It is fully
|
|
||||||
user-definable; see section <xref linkend="bdr-event-notification-command"/> for a reference
|
|
||||||
implementation.
|
|
||||||
</simpara>
|
|
||||||
</note>
|
|
||||||
|
|
||||||
</sect1>
|
|
||||||
|
|
||||||
<sect1 id="bdr-repmgr-setup" xreflabel="repmgr setup with BDR">
|
|
||||||
<title>repmgr setup</title>
|
|
||||||
<para>
|
|
||||||
Register both nodes; example on <literal>node1</literal>:
|
|
||||||
<programlisting>
|
|
||||||
$ repmgr -f /etc/repmgr.conf bdr register
|
|
||||||
NOTICE: attempting to install extension "repmgr"
|
|
||||||
NOTICE: "repmgr" extension successfully installed
|
|
||||||
NOTICE: node record created for node 'node1' (ID: 1)
|
|
||||||
NOTICE: BDR node 1 registered (conninfo: host=node1 dbname=bdrtest user=repmgr)</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
and on <literal>node1</literal>:
|
|
||||||
<programlisting>
|
|
||||||
$ repmgr -f /etc/repmgr.conf bdr register
|
|
||||||
NOTICE: node record created for node 'node2' (ID: 2)
|
|
||||||
NOTICE: BDR node 2 registered (conninfo: host=node2 dbname=bdrtest user=repmgr)</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
The <literal>repmgr</literal> extension will be automatically created
|
|
||||||
when the first node is registered, and will be propagated to the second
|
|
||||||
node.
|
|
||||||
</para>
|
|
||||||
<important>
|
|
||||||
<simpara>
|
|
||||||
Ensure the &repmgr; package is available on both nodes before
|
|
||||||
attempting to register the first node.
|
|
||||||
</simpara>
|
|
||||||
</important>
|
|
||||||
<para>
|
|
||||||
At this point the meta data for both nodes has been created; executing
|
|
||||||
<xref linkend="repmgr-cluster-show"/> (on either node) should produce output like this:
|
|
||||||
<programlisting>
|
|
||||||
$ repmgr -f /etc/repmgr.conf cluster show
|
|
||||||
ID | Name | Role | Status | Upstream | Location | Connection string
|
|
||||||
----+-------+------+-----------+----------+--------------------------------------------------------
|
|
||||||
1 | node1 | bdr | * running | | default | host=node1 dbname=bdrtest user=repmgr connect_timeout=2
|
|
||||||
2 | node2 | bdr | * running | | default | host=node2 dbname=bdrtest user=repmgr connect_timeout=2</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Additionally it's possible to display log of significant events; executing
|
|
||||||
<xref linkend="repmgr-cluster-event"/> (on either node) should produce output like this:
|
|
||||||
<programlisting>
|
|
||||||
$ repmgr -f /etc/repmgr.conf cluster event
|
|
||||||
Node ID | Event | OK | Timestamp | Details
|
|
||||||
---------+--------------+----+---------------------+----------------------------------------------
|
|
||||||
2 | bdr_register | t | 2017-07-27 17:51:48 | node record created for node 'node2' (ID: 2)
|
|
||||||
1 | bdr_register | t | 2017-07-27 17:51:00 | node record created for node 'node1' (ID: 1)
|
|
||||||
</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
At this point there will only be records for the two node registrations (displayed here
|
|
||||||
in reverse chronological order).
|
|
||||||
</para>
|
|
||||||
</sect1>
|
|
||||||
|
|
||||||
<sect1 id="bdr-event-notification-command" xreflabel="Defining the BDR failover "event_notification command"">
|
|
||||||
<title>Defining the BDR failover "event_notification_command"</title>
|
|
||||||
<para>
|
|
||||||
Key to "failover" execution is the <literal>event_notification_command</literal>,
|
|
||||||
which is a user-definable script specified in <filename>repmpgr.conf</filename>
|
|
||||||
and which can use a &repmgr; <link linkend="event-notifications">event notification</link>
|
|
||||||
to reconfigure the proxy server / connection pooler so it points to the other, still-active node.
|
|
||||||
Details of the event will be passed as parameters to the script.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Following parameter placeholders are available for the script definition in <filename>repmpgr.conf</filename>;
|
|
||||||
these will be replaced with the appropriate value when the script is executed:
|
|
||||||
</para>
|
|
||||||
|
|
||||||
<variablelist>
|
|
||||||
<varlistentry>
|
|
||||||
<term><option>%n</option></term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
node ID
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
|
|
||||||
<varlistentry>
|
|
||||||
<term><option>%e</option></term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
event type
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
|
|
||||||
<varlistentry>
|
|
||||||
<term><option>%t</option></term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
success (1 or 0)
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term><option>%t</option></term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
timestamp
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
|
|
||||||
<varlistentry>
|
|
||||||
<term><option>%d</option></term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
details
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term><option>%c</option></term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
conninfo string of the next available node (<varname>bdr_failover</varname> and <varname>bdr_recovery</varname>)
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
<varlistentry>
|
|
||||||
<term><option>%a</option></term>
|
|
||||||
<listitem>
|
|
||||||
<para>
|
|
||||||
name of the next available node (<varname>bdr_failover</varname> and <varname>bdr_recovery</varname>)
|
|
||||||
</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
</variablelist>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
Note that <literal>%c</literal> and <literal>%a</literal> are only provided with
|
|
||||||
particular failover events, in this case <varname>bdr_failover</varname>.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
The provided sample script
|
|
||||||
(<literal><ulink url="https://raw.githubusercontent.com/2ndQuadrant/repmgr/master/scripts/bdr-pgbouncer.sh">scripts/bdr-pgbouncer.sh</ulink></literal>)
|
|
||||||
is configured as follows:
|
|
||||||
<programlisting>
|
|
||||||
event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a"'</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
and parses the placeholder parameters like this:
|
|
||||||
<programlisting>
|
|
||||||
NODE_ID=$1
|
|
||||||
EVENT_TYPE=$2
|
|
||||||
SUCCESS=$3
|
|
||||||
NEXT_CONNINFO=$4
|
|
||||||
NEXT_NODE_NAME=$5</programlisting>
|
|
||||||
</para>
|
|
||||||
<note>
|
|
||||||
<para>
|
|
||||||
The sample script also contains some hard-coded values for the <application>PgBouncer</application>
|
|
||||||
configuration for both nodes; these will need to be adjusted for your local environment
|
|
||||||
(ideally the scripts would be maintained as templates and generated by some
|
|
||||||
kind of provisioning system).
|
|
||||||
</para>
|
|
||||||
</note>
|
|
||||||
|
|
||||||
<para>
|
|
||||||
The script performs following steps:
|
|
||||||
<itemizedlist spacing="compact" mark="bullet">
|
|
||||||
<listitem>
|
|
||||||
<simpara>pauses <application>PgBouncer</application> on all nodes</simpara>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<simpara>recreates the <application>PgBouncer</application> configuration file on each
|
|
||||||
node using the information provided by &repmgrd;
|
|
||||||
(primarily the <varname>conninfo</varname> string) to configure
|
|
||||||
<application>PgBouncer</application></simpara>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<simpara>reloads the <application>PgBouncer</application> configuration</simpara>
|
|
||||||
</listitem>
|
|
||||||
<listitem>
|
|
||||||
<simpara>executes the <command>RESUME</command> command (in <application>PgBouncer</application>)</simpara>
|
|
||||||
</listitem>
|
|
||||||
</itemizedlist>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Following successful script execution, any connections to PgBouncer on the failed BDR node
|
|
||||||
will be redirected to the active node.
|
|
||||||
</para>
|
|
||||||
</sect1>
|
|
||||||
|
|
||||||
<sect1 id="bdr-monitoring-failover" xreflabel="Node monitoring and failover">
|
|
||||||
<title>Node monitoring and failover</title>
|
|
||||||
<para>
|
|
||||||
At the intervals specified by <varname>monitor_interval_secs</varname>
|
|
||||||
in <filename>repmgr.conf</filename>, &repmgrd;
|
|
||||||
will ping each node to check if it's available. If a node isn't available,
|
|
||||||
&repmgrd; will enter failover mode and check <varname>reconnect_attempts</varname>
|
|
||||||
times at intervals of <varname>reconnect_interval</varname> to confirm the node is definitely unreachable.
|
|
||||||
This buffer period is necessary to avoid false positives caused by transient
|
|
||||||
network outages.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
If the node is still unavailable, &repmgrd; will enter failover mode and execute
|
|
||||||
the script defined in <varname>event_notification_command</varname>; an entry will be logged
|
|
||||||
in the <literal>repmgr.events</literal> table and &repmgrd; will
|
|
||||||
(unless otherwise configured) resume monitoring of the node in "degraded" mode until it reappears.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
&repmgrd; logfile output during a failover event will look something like this
|
|
||||||
on one node (usually the node which has failed, here <literal>node2</literal>):
|
|
||||||
<programlisting>
|
|
||||||
...
|
|
||||||
[2017-07-27 21:08:39] [INFO] starting continuous BDR node monitoring
|
|
||||||
[2017-07-27 21:08:39] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
|
|
||||||
[2017-07-27 21:08:55] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
|
|
||||||
[2017-07-27 21:09:11] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
|
|
||||||
[2017-07-27 21:09:23] [WARNING] unable to connect to node node2 (ID 2)
|
|
||||||
[2017-07-27 21:09:23] [INFO] checking state of node 2, 0 of 5 attempts
|
|
||||||
[2017-07-27 21:09:23] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
[2017-07-27 21:09:24] [INFO] checking state of node 2, 1 of 5 attempts
|
|
||||||
[2017-07-27 21:09:24] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
[2017-07-27 21:09:25] [INFO] checking state of node 2, 2 of 5 attempts
|
|
||||||
[2017-07-27 21:09:25] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
[2017-07-27 21:09:26] [INFO] checking state of node 2, 3 of 5 attempts
|
|
||||||
[2017-07-27 21:09:26] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
[2017-07-27 21:09:27] [INFO] checking state of node 2, 4 of 5 attempts
|
|
||||||
[2017-07-27 21:09:27] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
[2017-07-27 21:09:28] [WARNING] unable to reconnect to node 2 after 5 attempts
|
|
||||||
[2017-07-27 21:09:28] [NOTICE] setting node record for node 2 to inactive
|
|
||||||
[2017-07-27 21:09:28] [INFO] executing notification command for event "bdr_failover"
|
|
||||||
[2017-07-27 21:09:28] [DETAIL] command is:
|
|
||||||
/path/to/bdr-pgbouncer.sh 2 bdr_failover 1 "host=host=node1 dbname=bdrtest user=repmgr connect_timeout=2" "node1"
|
|
||||||
[2017-07-27 21:09:28] [INFO] node 'node2' (ID: 2) detected as failed; next available node is 'node1' (ID: 1)
|
|
||||||
[2017-07-27 21:09:28] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
|
|
||||||
[2017-07-27 21:09:28] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
|
|
||||||
...</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Output on the other node (<literal>node1</literal>) during the same event will look like this:
|
|
||||||
<programlisting>
|
|
||||||
...
|
|
||||||
[2017-07-27 21:08:35] [INFO] starting continuous BDR node monitoring
|
|
||||||
[2017-07-27 21:08:35] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
|
|
||||||
[2017-07-27 21:08:51] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
|
|
||||||
[2017-07-27 21:09:07] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
|
|
||||||
[2017-07-27 21:09:23] [WARNING] unable to connect to node node2 (ID 2)
|
|
||||||
[2017-07-27 21:09:23] [INFO] checking state of node 2, 0 of 5 attempts
|
|
||||||
[2017-07-27 21:09:23] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
[2017-07-27 21:09:24] [INFO] checking state of node 2, 1 of 5 attempts
|
|
||||||
[2017-07-27 21:09:24] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
[2017-07-27 21:09:25] [INFO] checking state of node 2, 2 of 5 attempts
|
|
||||||
[2017-07-27 21:09:25] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
[2017-07-27 21:09:26] [INFO] checking state of node 2, 3 of 5 attempts
|
|
||||||
[2017-07-27 21:09:26] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
[2017-07-27 21:09:27] [INFO] checking state of node 2, 4 of 5 attempts
|
|
||||||
[2017-07-27 21:09:27] [INFO] sleeping 1 seconds until next reconnection attempt
|
|
||||||
[2017-07-27 21:09:28] [WARNING] unable to reconnect to node 2 after 5 attempts
|
|
||||||
[2017-07-27 21:09:28] [NOTICE] other node's repmgrd is handling failover
|
|
||||||
[2017-07-27 21:09:28] [INFO] monitoring BDR replication status on node "node1" (ID: 1)
|
|
||||||
[2017-07-27 21:09:28] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
|
|
||||||
...</programlisting>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
This assumes only the PostgreSQL instance on <literal>node2</literal> has failed. In this case the
|
|
||||||
&repmgrd; instance running on <literal>node2</literal> has performed the failover. However if
|
|
||||||
the entire server becomes unavailable, &repmgrd; on <literal>node1</literal> will perform
|
|
||||||
the failover.
|
|
||||||
</para>
|
|
||||||
</sect1>
|
|
||||||
<sect1 id="bdr-node-recovery" xreflabel="Node recovery">
|
|
||||||
<title>Node recovery</title>
|
|
||||||
<para>
|
|
||||||
Following failure of a BDR node, if the node subsequently becomes available again,
|
|
||||||
a <varname>bdr_recovery</varname> event will be generated. This could potentially be used to
|
|
||||||
reconfigure PgBouncer automatically to bring the node back into the available pool,
|
|
||||||
however it would be prudent to manually verify the node's status before
|
|
||||||
exposing it to the application.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
If the failed node comes back up and connects correctly, output similar to this
|
|
||||||
will be visible in the &repmgrd; log:
|
|
||||||
<programlisting>
|
|
||||||
[2017-07-27 21:25:30] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
|
|
||||||
[2017-07-27 21:25:46] [INFO] monitoring BDR replication status on node "node2" (ID: 2)
|
|
||||||
[2017-07-27 21:25:46] [DETAIL] monitoring node "node2" (ID: 2) in degraded mode
|
|
||||||
[2017-07-27 21:25:55] [INFO] active replication slot for node "node1" found after 1 seconds
|
|
||||||
[2017-07-27 21:25:55] [NOTICE] node "node2" (ID: 2) has recovered after 986 seconds</programlisting>
|
|
||||||
</para>
|
|
||||||
</sect1>
|
|
||||||
|
|
||||||
<sect1 id="bdr-complete-shutdown" xreflabel="Shutdown of both nodes">
|
|
||||||
<title>Shutdown of both nodes</title>
|
|
||||||
<para>
|
|
||||||
If both PostgreSQL instances are shut down, &repmgrd; will try and handle the
|
|
||||||
situation as gracefully as possible, though with no failover candidates available
|
|
||||||
there's not much it can do. Should this case ever occur, we recommend shutting
|
|
||||||
down &repmgrd; on both nodes and restarting it once the PostgreSQL instances
|
|
||||||
are running properly.
|
|
||||||
</para>
|
|
||||||
</sect1>
|
|
||||||
</chapter>
|
|
||||||
|
|
||||||
@@ -15,9 +15,13 @@
|
|||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
&repmgrd; can be configured to provide failover
|
&repmgrd; can be configured to provide failover
|
||||||
capability in case the primary upstream node becomes unreachable, and/or
|
capability in case the primary or upstream node becomes unreachable, and/or
|
||||||
provide monitoring data to the &repmgr; metadatabase.
|
provide monitoring data to the &repmgr; metadatabase.
|
||||||
</para>
|
</para>
|
||||||
|
<para>
|
||||||
|
From &repmgr; 4.4, when running on the primary node, &repmgrd; can also monitor
|
||||||
|
standby disconnections/reconnections (see <xref linkend="repmgrd-primary-child-disconnection"/>).
|
||||||
|
</para>
|
||||||
|
|
||||||
<sect1 id="repmgrd-basic-configuration">
|
<sect1 id="repmgrd-basic-configuration">
|
||||||
<title>repmgrd configuration</title>
|
<title>repmgrd configuration</title>
|
||||||
@@ -394,8 +398,8 @@
|
|||||||
</indexterm>
|
</indexterm>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
If <literal>true</literal>, only continue with failover if no standbys have seen
|
If <literal>true</literal>, only continue with failover if no standbys
|
||||||
the primary node recently.
|
(or the witness server, if present) have seen the primary node recently.
|
||||||
</para>
|
</para>
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
@@ -486,6 +490,23 @@
|
|||||||
|
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
|
||||||
|
<sect2 id="repmgrd-automatic-failover-configuration-pgbouncer-fencing">
|
||||||
|
<title>Configuring &repmgrd; and pgbouncer to fence a failed primary node</title>
|
||||||
|
<indexterm>
|
||||||
|
<primary>fencing</primary>
|
||||||
|
<secondary>using repmgrd and pgbouncer to fence a failed primary node</secondary>
|
||||||
|
</indexterm>
|
||||||
|
<indexterm>
|
||||||
|
<primary>PgBouncer</primary>
|
||||||
|
<secondary>using repmgrd and pgbouncer to fence a failed primary node</secondary>
|
||||||
|
</indexterm>
|
||||||
|
<para>
|
||||||
|
For further details and a reference implementation, see the separate document
|
||||||
|
<ulink url="https://github.com/2ndQuadrant/repmgr/blob/master/doc/repmgrd-node-fencing.md">Fencing a failed master node with repmgrd and PgBouncer</ulink>.
|
||||||
|
</para>
|
||||||
|
</sect2>
|
||||||
|
|
||||||
<sect2 id="postgresql-service-configuration">
|
<sect2 id="postgresql-service-configuration">
|
||||||
<title>PostgreSQL service configuration</title>
|
<title>PostgreSQL service configuration</title>
|
||||||
|
|
||||||
@@ -566,7 +587,8 @@ repmgrd_service_stop_command='sudo systemctl repmgr12 stop'
|
|||||||
the option <option>monitor_interval_secs</option> (see above).
|
the option <option>monitor_interval_secs</option> (see above).
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
For more details on monitoring, see <xref linkend="repmgrd-monitoring"/>.
|
For more details on monitoring, see <xref linkend="repmgrd-monitoring"/>. For information on
|
||||||
|
monitoring standby disconnections, see <xref linkend="repmgrd-primary-child-disconnection"/>.
|
||||||
</para>
|
</para>
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
@@ -602,18 +624,6 @@ repmgrd_service_stop_command='sudo systemctl repmgr12 stop'
|
|||||||
</simpara>
|
</simpara>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>
|
|
||||||
<varname>bdr_local_monitoring_only</varname>
|
|
||||||
</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
|
||||||
<simpara>
|
|
||||||
<varname>bdr_recovery_timeout</varname>
|
|
||||||
</simpara>
|
|
||||||
</listitem>
|
|
||||||
|
|
||||||
<listitem>
|
<listitem>
|
||||||
<simpara>
|
<simpara>
|
||||||
<varname>child_nodes_check_interval</varname>
|
<varname>child_nodes_check_interval</varname>
|
||||||
|
|||||||
@@ -108,7 +108,7 @@ The actual script is as follows; adjust the configurable items as appropriate:
|
|||||||
|
|
||||||
# 1. Promote this node from standby to primary
|
# 1. Promote this node from standby to primary
|
||||||
|
|
||||||
repmgr standby promote -f /etc/repmgr.conf
|
repmgr standby promote -f /etc/repmgr.conf --log-to-file
|
||||||
|
|
||||||
# 2. Reconfigure pgbouncer instances
|
# 2. Reconfigure pgbouncer instances
|
||||||
|
|
||||||
@@ -146,7 +146,7 @@ Script and template file should be installed on each node where `repmgrd` is run
|
|||||||
Finally, set `promote_command` in `repmgr.conf` on each node to
|
Finally, set `promote_command` in `repmgr.conf` on each node to
|
||||||
point to the custom promote script:
|
point to the custom promote script:
|
||||||
|
|
||||||
promote_command=/var/lib/postgres/repmgr/promote.sh
|
promote_command='/var/lib/postgres/repmgr/promote.sh'
|
||||||
|
|
||||||
and reload/restart any running `repmgrd` instances for the changes to take
|
and reload/restart any running `repmgrd` instances for the changes to take
|
||||||
effect.
|
effect.
|
||||||
|
|||||||
@@ -186,6 +186,7 @@
|
|||||||
NOTICE: local node "node2" (ID: 2) will be promoted to primary; current primary "node1" (ID: 1) will be demoted to standby
|
NOTICE: local node "node2" (ID: 2) will be promoted to primary; current primary "node1" (ID: 1) will be demoted to standby
|
||||||
INFO: following shutdown command would be run on node "node1":
|
INFO: following shutdown command would be run on node "node1":
|
||||||
"pg_ctl -l /var/log/postgresql/startup.log -D '/var/lib/postgresql/data' -m fast -W stop"
|
"pg_ctl -l /var/log/postgresql/startup.log -D '/var/lib/postgresql/data' -m fast -W stop"
|
||||||
|
INFO: parameter "shutdown_check_timeout" is set to 60 seconds
|
||||||
</programlisting>
|
</programlisting>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
|||||||
@@ -201,9 +201,13 @@ ALTER EXTENSION repmgr UPDATE</programlisting>
|
|||||||
</para>
|
</para>
|
||||||
<tip>
|
<tip>
|
||||||
<para>
|
<para>
|
||||||
If the &repmgr; upgrade requires a PostgreSQL restart, combine the &repmgr; upgrade
|
If the &repmgr; upgrade requires a PostgreSQL restart, combine the &repmgr; upgrade
|
||||||
with a PostgreSQL minor version upgrade, which will require a restart in any case.
|
with a PostgreSQL minor version upgrade, which will require a restart in any case.
|
||||||
New PostgreSQL minor version are usually released every couple of months.
|
</para>
|
||||||
|
<para>
|
||||||
|
New PostgreSQL minor versions are usually released every couple of months;
|
||||||
|
see the <ulink url="https://www.postgresql.org/developer/roadmap/">Roadmap</ulink>
|
||||||
|
for the current schedule.
|
||||||
</para>
|
</para>
|
||||||
</tip>
|
</tip>
|
||||||
</sect2>
|
</sect2>
|
||||||
@@ -269,6 +273,29 @@ ALTER EXTENSION repmgr UPDATE</programlisting>
|
|||||||
</para>
|
</para>
|
||||||
</tip>
|
</tip>
|
||||||
|
|
||||||
|
<sect2 id="upgrading-pg-upgrade-standby" xreflabel="pg_upgrade and upgrading standbys">
|
||||||
|
<title>Upgrading standbys with pg_upgrade and rsync</title>
|
||||||
|
<para>
|
||||||
|
If you are intending to upgrade a standby using the <command>rsync</command> method described
|
||||||
|
in the <ulink url="https://www.postgresql.org/docs/current/pgupgrade.html#PGUPGRADE-STEP-REPLICAS">pg_upgrade documentation</ulink>,
|
||||||
|
you <emphasis>must</emphasis> ensure the standby's replication configuration is present and correct
|
||||||
|
before starting the standby.
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Use <link linkend="repmgr-standby-clone">repmgr standby clone --replication-conf-only</link> to generate
|
||||||
|
the correct replication configuration.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<tip>
|
||||||
|
<para>
|
||||||
|
If upgrading from PostgreSQL 11 or earlier, be sure to delete <filename>recovery.conf</filename>, if present,
|
||||||
|
otherwise PostgreSQL will refuse to start.
|
||||||
|
</para>
|
||||||
|
</tip>
|
||||||
|
|
||||||
|
</sect2>
|
||||||
|
|
||||||
|
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* errcode.h
|
* errcode.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -32,18 +32,6 @@ SELECT * FROM repmgr.show_nodes;
|
|||||||
(0 rows)
|
(0 rows)
|
||||||
|
|
||||||
-- functions
|
-- functions
|
||||||
SELECT repmgr.am_bdr_failover_handler(-1);
|
|
||||||
am_bdr_failover_handler
|
|
||||||
-------------------------
|
|
||||||
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
SELECT repmgr.am_bdr_failover_handler(NULL);
|
|
||||||
am_bdr_failover_handler
|
|
||||||
-------------------------
|
|
||||||
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
SELECT repmgr.get_new_primary();
|
SELECT repmgr.get_new_primary();
|
||||||
get_new_primary
|
get_new_primary
|
||||||
-----------------
|
-----------------
|
||||||
@@ -92,9 +80,3 @@ SELECT repmgr.standby_set_last_updated();
|
|||||||
|
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
SELECT repmgr.unset_bdr_failover_handler();
|
|
||||||
unset_bdr_failover_handler
|
|
||||||
----------------------------
|
|
||||||
|
|
||||||
(1 row)
|
|
||||||
|
|
||||||
|
|||||||
2
log.c
2
log.c
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* log.c - Logging methods
|
* log.c - Logging methods
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
2
log.h
2
log.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* log.h
|
* log.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
5
repmgr--5.0--5.1.sql
Normal file
5
repmgr--5.0--5.1.sql
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||||
|
\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
|
||||||
|
|
||||||
|
DROP FUNCTION am_bdr_failover_handler(INT);
|
||||||
|
DROP FUNCTION unset_bdr_failover_handler();
|
||||||
@@ -153,16 +153,6 @@ CREATE FUNCTION reset_voting_status()
|
|||||||
AS 'MODULE_PATHNAME', 'reset_voting_status'
|
AS 'MODULE_PATHNAME', 'reset_voting_status'
|
||||||
LANGUAGE C STRICT;
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
CREATE FUNCTION am_bdr_failover_handler(INT)
|
|
||||||
RETURNS BOOL
|
|
||||||
AS 'MODULE_PATHNAME', 'am_bdr_failover_handler'
|
|
||||||
LANGUAGE C STRICT;
|
|
||||||
|
|
||||||
CREATE FUNCTION unset_bdr_failover_handler()
|
|
||||||
RETURNS VOID
|
|
||||||
AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler'
|
|
||||||
LANGUAGE C STRICT;
|
|
||||||
|
|
||||||
CREATE FUNCTION get_repmgrd_pid()
|
CREATE FUNCTION get_repmgrd_pid()
|
||||||
RETURNS INT
|
RETURNS INT
|
||||||
AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
|
AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
|
||||||
|
|||||||
214
repmgr--5.1.sql
Normal file
214
repmgr--5.1.sql
Normal file
@@ -0,0 +1,214 @@
|
|||||||
|
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||||
|
\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
|
||||||
|
|
||||||
|
CREATE TABLE repmgr.nodes (
|
||||||
|
node_id INTEGER PRIMARY KEY,
|
||||||
|
upstream_node_id INTEGER NULL REFERENCES nodes (node_id) DEFERRABLE,
|
||||||
|
active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||||
|
node_name TEXT NOT NULL,
|
||||||
|
type TEXT NOT NULL CHECK (type IN('primary','standby','witness','bdr')),
|
||||||
|
location TEXT NOT NULL DEFAULT 'default',
|
||||||
|
priority INT NOT NULL DEFAULT 100,
|
||||||
|
conninfo TEXT NOT NULL,
|
||||||
|
repluser VARCHAR(63) NOT NULL,
|
||||||
|
slot_name TEXT NULL,
|
||||||
|
config_file TEXT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE repmgr.events (
|
||||||
|
node_id INTEGER NOT NULL,
|
||||||
|
event TEXT NOT NULL,
|
||||||
|
successful BOOLEAN NOT NULL DEFAULT TRUE,
|
||||||
|
event_timestamp TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
details TEXT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
DO $repmgr$
|
||||||
|
DECLARE
|
||||||
|
DECLARE server_version_num INT;
|
||||||
|
BEGIN
|
||||||
|
SELECT setting
|
||||||
|
FROM pg_catalog.pg_settings
|
||||||
|
WHERE name = 'server_version_num'
|
||||||
|
INTO server_version_num;
|
||||||
|
IF server_version_num >= 90400 THEN
|
||||||
|
EXECUTE $repmgr_func$
|
||||||
|
CREATE TABLE repmgr.monitoring_history (
|
||||||
|
primary_node_id INTEGER NOT NULL,
|
||||||
|
standby_node_id INTEGER NOT NULL,
|
||||||
|
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||||
|
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||||
|
last_wal_primary_location PG_LSN NOT NULL,
|
||||||
|
last_wal_standby_location PG_LSN,
|
||||||
|
replication_lag BIGINT NOT NULL,
|
||||||
|
apply_lag BIGINT NOT NULL
|
||||||
|
)
|
||||||
|
$repmgr_func$;
|
||||||
|
ELSE
|
||||||
|
EXECUTE $repmgr_func$
|
||||||
|
CREATE TABLE repmgr.monitoring_history (
|
||||||
|
primary_node_id INTEGER NOT NULL,
|
||||||
|
standby_node_id INTEGER NOT NULL,
|
||||||
|
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||||
|
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||||
|
last_wal_primary_location TEXT NOT NULL,
|
||||||
|
last_wal_standby_location TEXT,
|
||||||
|
replication_lag BIGINT NOT NULL,
|
||||||
|
apply_lag BIGINT NOT NULL
|
||||||
|
)
|
||||||
|
$repmgr_func$;
|
||||||
|
END IF;
|
||||||
|
END$repmgr$;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE INDEX idx_monitoring_history_time
|
||||||
|
ON repmgr.monitoring_history (last_monitor_time, standby_node_id);
|
||||||
|
|
||||||
|
CREATE VIEW repmgr.show_nodes AS
|
||||||
|
SELECT n.node_id,
|
||||||
|
n.node_name,
|
||||||
|
n.active,
|
||||||
|
n.upstream_node_id,
|
||||||
|
un.node_name AS upstream_node_name,
|
||||||
|
n.type,
|
||||||
|
n.priority,
|
||||||
|
n.conninfo
|
||||||
|
FROM repmgr.nodes n
|
||||||
|
LEFT JOIN repmgr.nodes un
|
||||||
|
ON un.node_id = n.upstream_node_id;
|
||||||
|
|
||||||
|
CREATE TABLE repmgr.voting_term (
|
||||||
|
term INT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX voting_term_restrict
|
||||||
|
ON repmgr.voting_term ((TRUE));
|
||||||
|
|
||||||
|
CREATE RULE voting_term_delete AS
|
||||||
|
ON DELETE TO repmgr.voting_term
|
||||||
|
DO INSTEAD NOTHING;
|
||||||
|
|
||||||
|
|
||||||
|
/* ================= */
|
||||||
|
/* repmgrd functions */
|
||||||
|
/* ================= */
|
||||||
|
|
||||||
|
/* monitoring functions */
|
||||||
|
|
||||||
|
CREATE FUNCTION set_local_node_id(INT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_local_node_id'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_local_node_id()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_local_node_id'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION standby_set_last_updated()
|
||||||
|
RETURNS TIMESTAMP WITH TIME ZONE
|
||||||
|
AS 'MODULE_PATHNAME', 'standby_set_last_updated'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION standby_get_last_updated()
|
||||||
|
RETURNS TIMESTAMP WITH TIME ZONE
|
||||||
|
AS 'MODULE_PATHNAME', 'standby_get_last_updated'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION set_upstream_last_seen(INT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_upstream_last_seen'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_upstream_last_seen()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_upstream_last_seen'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_upstream_node_id()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_upstream_node_id'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION set_upstream_node_id(INT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_upstream_node_id'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
/* failover functions */
|
||||||
|
|
||||||
|
CREATE FUNCTION notify_follow_primary(INT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'notify_follow_primary'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_new_primary()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_new_primary'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION reset_voting_status()
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'reset_voting_status'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_repmgrd_pid()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_repmgrd_pidfile()
|
||||||
|
RETURNS TEXT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION set_repmgrd_pid(INT, TEXT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_repmgrd_pid'
|
||||||
|
LANGUAGE C CALLED ON NULL INPUT;
|
||||||
|
|
||||||
|
CREATE FUNCTION repmgrd_is_running()
|
||||||
|
RETURNS BOOL
|
||||||
|
AS 'MODULE_PATHNAME', 'repmgrd_is_running'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION repmgrd_pause(BOOL)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'repmgrd_pause'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION repmgrd_is_paused()
|
||||||
|
RETURNS BOOL
|
||||||
|
AS 'MODULE_PATHNAME', 'repmgrd_is_paused'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_wal_receiver_pid()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_wal_receiver_pid'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* views */
|
||||||
|
|
||||||
|
CREATE VIEW repmgr.replication_status AS
|
||||||
|
SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name,
|
||||||
|
n.type AS node_type, n.active, last_monitor_time,
|
||||||
|
CASE WHEN n.type='standby' THEN m.last_wal_primary_location ELSE NULL END AS last_wal_primary_location,
|
||||||
|
m.last_wal_standby_location,
|
||||||
|
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.replication_lag) ELSE NULL END AS replication_lag,
|
||||||
|
CASE WHEN n.type='standby' THEN
|
||||||
|
CASE WHEN replication_lag > 0 THEN age(now(), m.last_apply_time) ELSE '0'::INTERVAL END
|
||||||
|
ELSE NULL
|
||||||
|
END AS replication_time_lag,
|
||||||
|
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.apply_lag) ELSE NULL END AS apply_lag,
|
||||||
|
AGE(NOW(), CASE WHEN pg_catalog.pg_is_in_recovery() THEN repmgr.standby_get_last_updated() ELSE m.last_monitor_time END) AS communication_time_lag
|
||||||
|
FROM repmgr.monitoring_history m
|
||||||
|
JOIN repmgr.nodes n ON m.standby_node_id = n.node_id
|
||||||
|
WHERE (m.standby_node_id, m.last_monitor_time) IN (
|
||||||
|
SELECT m1.standby_node_id, MAX(m1.last_monitor_time)
|
||||||
|
FROM repmgr.monitoring_history m1 GROUP BY 1
|
||||||
|
);
|
||||||
|
|
||||||
265
repmgr--unpackaged--5.1.sql
Normal file
265
repmgr--unpackaged--5.1.sql
Normal file
@@ -0,0 +1,265 @@
|
|||||||
|
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||||
|
\echo Use "CREATE EXTENSION repmgr" to load this file. \quit
|
||||||
|
|
||||||
|
-- extract the current schema name
|
||||||
|
-- NOTE: this assumes there will be only one schema matching 'repmgr_%';
|
||||||
|
-- user is responsible for ensuring this is the case
|
||||||
|
|
||||||
|
CREATE TEMPORARY TABLE repmgr_old_schema (schema_name TEXT);
|
||||||
|
INSERT INTO repmgr_old_schema (schema_name)
|
||||||
|
SELECT nspname AS schema_name
|
||||||
|
FROM pg_catalog.pg_namespace
|
||||||
|
WHERE nspname LIKE 'repmgr_%'
|
||||||
|
LIMIT 1;
|
||||||
|
|
||||||
|
-- move old objects into new schema
|
||||||
|
DO $repmgr$
|
||||||
|
DECLARE
|
||||||
|
old_schema TEXT;
|
||||||
|
BEGIN
|
||||||
|
SELECT schema_name FROM repmgr_old_schema
|
||||||
|
INTO old_schema;
|
||||||
|
EXECUTE format('ALTER TABLE %I.repl_nodes SET SCHEMA repmgr', old_schema);
|
||||||
|
EXECUTE format('ALTER TABLE %I.repl_events SET SCHEMA repmgr', old_schema);
|
||||||
|
EXECUTE format('ALTER TABLE %I.repl_monitor SET SCHEMA repmgr', old_schema);
|
||||||
|
EXECUTE format('DROP VIEW IF EXISTS %I.repl_show_nodes', old_schema);
|
||||||
|
EXECUTE format('DROP VIEW IF EXISTS %I.repl_status', old_schema);
|
||||||
|
END$repmgr$;
|
||||||
|
|
||||||
|
-- convert "repmgr_$cluster.repl_nodes" to "repmgr.nodes"
|
||||||
|
CREATE TABLE repmgr.nodes (
|
||||||
|
node_id INTEGER PRIMARY KEY,
|
||||||
|
upstream_node_id INTEGER NULL REFERENCES repmgr.nodes (node_id) DEFERRABLE,
|
||||||
|
active BOOLEAN NOT NULL DEFAULT TRUE,
|
||||||
|
node_name TEXT NOT NULL,
|
||||||
|
type TEXT NOT NULL CHECK (type IN('primary','standby','witness','bdr')),
|
||||||
|
location TEXT NOT NULL DEFAULT 'default',
|
||||||
|
priority INT NOT NULL DEFAULT 100,
|
||||||
|
conninfo TEXT NOT NULL,
|
||||||
|
repluser VARCHAR(63) NOT NULL,
|
||||||
|
slot_name TEXT NULL,
|
||||||
|
config_file TEXT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO repmgr.nodes
|
||||||
|
(node_id, upstream_node_id, active, node_name, type, location, priority, conninfo, repluser, slot_name, config_file)
|
||||||
|
SELECT id, upstream_node_id, active, name,
|
||||||
|
CASE WHEN type = 'master' THEN 'primary' ELSE type END,
|
||||||
|
'default', priority, conninfo, 'unknown', slot_name, 'unknown'
|
||||||
|
FROM repmgr.repl_nodes
|
||||||
|
ORDER BY id;
|
||||||
|
|
||||||
|
|
||||||
|
-- convert "repmgr_$cluster.repl_event" to "event"
|
||||||
|
|
||||||
|
ALTER TABLE repmgr.repl_events RENAME TO events;
|
||||||
|
|
||||||
|
-- create new table "repmgr.voting_term"
|
||||||
|
CREATE TABLE repmgr.voting_term (
|
||||||
|
term INT NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE UNIQUE INDEX voting_term_restrict
|
||||||
|
ON repmgr.voting_term ((TRUE));
|
||||||
|
|
||||||
|
CREATE RULE voting_term_delete AS
|
||||||
|
ON DELETE TO repmgr.voting_term
|
||||||
|
DO INSTEAD NOTHING;
|
||||||
|
|
||||||
|
INSERT INTO repmgr.voting_term (term) VALUES (1);
|
||||||
|
|
||||||
|
|
||||||
|
-- convert "repmgr_$cluster.repl_monitor" to "monitoring_history"
|
||||||
|
|
||||||
|
|
||||||
|
DO $repmgr$
|
||||||
|
DECLARE
|
||||||
|
DECLARE server_version_num INT;
|
||||||
|
BEGIN
|
||||||
|
SELECT setting
|
||||||
|
FROM pg_catalog.pg_settings
|
||||||
|
WHERE name = 'server_version_num'
|
||||||
|
INTO server_version_num;
|
||||||
|
IF server_version_num >= 90400 THEN
|
||||||
|
EXECUTE $repmgr_func$
|
||||||
|
CREATE TABLE repmgr.monitoring_history (
|
||||||
|
primary_node_id INTEGER NOT NULL,
|
||||||
|
standby_node_id INTEGER NOT NULL,
|
||||||
|
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||||
|
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||||
|
last_wal_primary_location PG_LSN NOT NULL,
|
||||||
|
last_wal_standby_location PG_LSN,
|
||||||
|
replication_lag BIGINT NOT NULL,
|
||||||
|
apply_lag BIGINT NOT NULL
|
||||||
|
)
|
||||||
|
$repmgr_func$;
|
||||||
|
INSERT INTO repmgr.monitoring_history
|
||||||
|
(primary_node_id, standby_node_id, last_monitor_time, last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag)
|
||||||
|
SELECT primary_node, standby_node, last_monitor_time, last_apply_time, last_wal_primary_location::pg_lsn, last_wal_standby_location::pg_lsn, replication_lag, apply_lag
|
||||||
|
FROM repmgr.repl_monitor;
|
||||||
|
ELSE
|
||||||
|
EXECUTE $repmgr_func$
|
||||||
|
CREATE TABLE repmgr.monitoring_history (
|
||||||
|
primary_node_id INTEGER NOT NULL,
|
||||||
|
standby_node_id INTEGER NOT NULL,
|
||||||
|
last_monitor_time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||||
|
last_apply_time TIMESTAMP WITH TIME ZONE,
|
||||||
|
last_wal_primary_location TEXT NOT NULL,
|
||||||
|
last_wal_standby_location TEXT,
|
||||||
|
replication_lag BIGINT NOT NULL,
|
||||||
|
apply_lag BIGINT NOT NULL
|
||||||
|
)
|
||||||
|
$repmgr_func$;
|
||||||
|
INSERT INTO repmgr.monitoring_history
|
||||||
|
(primary_node_id, standby_node_id, last_monitor_time, last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag)
|
||||||
|
SELECT primary_node, standby_node, last_monitor_time, last_apply_time, last_wal_primary_location, last_wal_standby_location, replication_lag, apply_lag
|
||||||
|
FROM repmgr.repl_monitor;
|
||||||
|
|
||||||
|
END IF;
|
||||||
|
END$repmgr$;
|
||||||
|
|
||||||
|
CREATE INDEX idx_monitoring_history_time
|
||||||
|
ON repmgr.monitoring_history (last_monitor_time, standby_node_id);
|
||||||
|
|
||||||
|
CREATE VIEW repmgr.show_nodes AS
|
||||||
|
SELECT n.node_id,
|
||||||
|
n.node_name,
|
||||||
|
n.active,
|
||||||
|
n.upstream_node_id,
|
||||||
|
un.node_name AS upstream_node_name,
|
||||||
|
n.type,
|
||||||
|
n.priority,
|
||||||
|
n.conninfo
|
||||||
|
FROM repmgr.nodes n
|
||||||
|
LEFT JOIN repmgr.nodes un
|
||||||
|
ON un.node_id = n.upstream_node_id;
|
||||||
|
|
||||||
|
|
||||||
|
/* ================= */
|
||||||
|
/* repmgrd functions */
|
||||||
|
/* ================= */
|
||||||
|
|
||||||
|
/* monitoring functions */
|
||||||
|
|
||||||
|
CREATE FUNCTION set_local_node_id(INT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_local_node_id'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_local_node_id()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_local_node_id'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION standby_set_last_updated()
|
||||||
|
RETURNS TIMESTAMP WITH TIME ZONE
|
||||||
|
AS 'MODULE_PATHNAME', 'standby_set_last_updated'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION standby_get_last_updated()
|
||||||
|
RETURNS TIMESTAMP WITH TIME ZONE
|
||||||
|
AS 'MODULE_PATHNAME', 'standby_get_last_updated'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION set_upstream_last_seen(INT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_upstream_last_seen'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_upstream_last_seen()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_upstream_last_seen'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_upstream_node_id()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_upstream_node_id'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION set_upstream_node_id(INT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_upstream_node_id'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
/* failover functions */
|
||||||
|
|
||||||
|
CREATE FUNCTION notify_follow_primary(INT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'notify_follow_primary'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_new_primary()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_new_primary'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION reset_voting_status()
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'reset_voting_status'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_repmgrd_pid()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_repmgrd_pid'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_repmgrd_pidfile()
|
||||||
|
RETURNS TEXT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION set_repmgrd_pid(INT, TEXT)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'set_repmgrd_pid'
|
||||||
|
LANGUAGE C CALLED ON NULL INPUT;
|
||||||
|
|
||||||
|
CREATE FUNCTION repmgrd_is_running()
|
||||||
|
RETURNS BOOL
|
||||||
|
AS 'MODULE_PATHNAME', 'repmgrd_is_running'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION repmgrd_pause(BOOL)
|
||||||
|
RETURNS VOID
|
||||||
|
AS 'MODULE_PATHNAME', 'repmgrd_pause'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION repmgrd_is_paused()
|
||||||
|
RETURNS BOOL
|
||||||
|
AS 'MODULE_PATHNAME', 'repmgrd_is_paused'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
CREATE FUNCTION get_wal_receiver_pid()
|
||||||
|
RETURNS INT
|
||||||
|
AS 'MODULE_PATHNAME', 'get_wal_receiver_pid'
|
||||||
|
LANGUAGE C STRICT;
|
||||||
|
|
||||||
|
|
||||||
|
/* views */
|
||||||
|
|
||||||
|
CREATE VIEW repmgr.replication_status AS
|
||||||
|
SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name,
|
||||||
|
n.type AS node_type, n.active, last_monitor_time,
|
||||||
|
CASE WHEN n.type='standby' THEN m.last_wal_primary_location ELSE NULL END AS last_wal_primary_location,
|
||||||
|
m.last_wal_standby_location,
|
||||||
|
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.replication_lag) ELSE NULL END AS replication_lag,
|
||||||
|
CASE WHEN n.type='standby' THEN
|
||||||
|
CASE WHEN replication_lag > 0 THEN age(now(), m.last_apply_time) ELSE '0'::INTERVAL END
|
||||||
|
ELSE NULL
|
||||||
|
END AS replication_time_lag,
|
||||||
|
CASE WHEN n.type='standby' THEN pg_catalog.pg_size_pretty(m.apply_lag) ELSE NULL END AS apply_lag,
|
||||||
|
AGE(NOW(), CASE WHEN pg_catalog.pg_is_in_recovery() THEN repmgr.standby_get_last_updated() ELSE m.last_monitor_time END) AS communication_time_lag
|
||||||
|
FROM repmgr.monitoring_history m
|
||||||
|
JOIN repmgr.nodes n ON m.standby_node_id = n.node_id
|
||||||
|
WHERE (m.standby_node_id, m.last_monitor_time) IN (
|
||||||
|
SELECT m1.standby_node_id, MAX(m1.last_monitor_time)
|
||||||
|
FROM repmgr.monitoring_history m1 GROUP BY 1
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* drop old tables */
|
||||||
|
DROP TABLE repmgr.repl_nodes;
|
||||||
|
DROP TABLE repmgr.repl_monitor;
|
||||||
|
|
||||||
|
-- remove temporary table
|
||||||
|
DROP TABLE repmgr_old_schema;
|
||||||
@@ -1,557 +0,0 @@
|
|||||||
/*
|
|
||||||
* repmgr-action-bdr.c
|
|
||||||
*
|
|
||||||
* Implements BDR-related actions for the repmgr command line utility
|
|
||||||
*
|
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "repmgr.h"
|
|
||||||
|
|
||||||
#include "repmgr-client-global.h"
|
|
||||||
#include "repmgr-action-bdr.h"
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* do_bdr_register()
|
|
||||||
*
|
|
||||||
* As each BDR node is its own primary, registering a BDR node
|
|
||||||
* will create the repmgr metadata schema if necessary.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
do_bdr_register(void)
|
|
||||||
{
|
|
||||||
PGconn *conn = NULL;
|
|
||||||
BdrNodeInfoList bdr_nodes = T_BDR_NODE_INFO_LIST_INITIALIZER;
|
|
||||||
ExtensionStatus extension_status = REPMGR_UNKNOWN;
|
|
||||||
t_node_info node_info = T_NODE_INFO_INITIALIZER;
|
|
||||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
|
||||||
PQExpBufferData event_details;
|
|
||||||
bool success = true;
|
|
||||||
char *dbname = NULL;
|
|
||||||
|
|
||||||
/* sanity-check configuration for BDR-compatability */
|
|
||||||
if (config_file_options.replication_type != REPLICATION_TYPE_BDR)
|
|
||||||
{
|
|
||||||
log_error(_("cannot run BDR REGISTER on a non-BDR node"));
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
dbname = pg_malloc0(MAXLEN);
|
|
||||||
|
|
||||||
if (dbname == NULL)
|
|
||||||
{
|
|
||||||
log_error(_("unable to allocate memory; terminating."));
|
|
||||||
exit(ERR_OUT_OF_MEMORY);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* store the database name for future reference */
|
|
||||||
get_conninfo_value(config_file_options.conninfo, "dbname", dbname);
|
|
||||||
|
|
||||||
conn = establish_db_connection(config_file_options.conninfo, true);
|
|
||||||
|
|
||||||
if (!is_bdr_db(conn, NULL))
|
|
||||||
{
|
|
||||||
log_error(_("database \"%s\" is not BDR-enabled"), dbname);
|
|
||||||
log_hint(_("when using repmgr with BDR, the repmgr schema must be stored in the BDR database"));
|
|
||||||
PQfinish(conn);
|
|
||||||
pfree(dbname);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check that there are at most 2 BDR nodes */
|
|
||||||
get_all_bdr_node_records(conn, &bdr_nodes);
|
|
||||||
|
|
||||||
if (bdr_nodes.node_count == 0)
|
|
||||||
{
|
|
||||||
log_error(_("database \"%s\" is BDR-enabled but no BDR nodes were found"), dbname);
|
|
||||||
PQfinish(conn);
|
|
||||||
pfree(dbname);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* BDR 2 implementation is for 2 nodes only */
|
|
||||||
if (get_bdr_version_num() < 3 && bdr_nodes.node_count > 2)
|
|
||||||
{
|
|
||||||
log_error(_("repmgr can only support BDR 2.x clusters with 2 nodes"));
|
|
||||||
log_detail(_("this BDR cluster has %i nodes"), bdr_nodes.node_count);
|
|
||||||
PQfinish(conn);
|
|
||||||
pfree(dbname);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (get_bdr_version_num() > 2)
|
|
||||||
{
|
|
||||||
log_error(_("\"repmgr bdr register\" is for BDR 2.x only"));
|
|
||||||
PQfinish(conn);
|
|
||||||
pfree(dbname);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* check for a matching BDR node */
|
|
||||||
{
|
|
||||||
PQExpBufferData bdr_local_node_name;
|
|
||||||
bool node_match = false;
|
|
||||||
|
|
||||||
initPQExpBuffer(&bdr_local_node_name);
|
|
||||||
node_match = bdr_node_name_matches(conn, config_file_options.node_name, &bdr_local_node_name);
|
|
||||||
|
|
||||||
if (node_match == false)
|
|
||||||
{
|
|
||||||
if (strlen(bdr_local_node_name.data))
|
|
||||||
{
|
|
||||||
log_error(_("local node BDR node name is \"%s\", expected: \"%s\""),
|
|
||||||
bdr_local_node_name.data,
|
|
||||||
config_file_options.node_name);
|
|
||||||
log_hint(_("\"node_name\" in repmgr.conf must match \"node_name\" in bdr.bdr_nodes"));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
log_error(_("local node does not report BDR node name"));
|
|
||||||
log_hint(_("ensure this is an active BDR node"));
|
|
||||||
}
|
|
||||||
|
|
||||||
PQfinish(conn);
|
|
||||||
pfree(dbname);
|
|
||||||
termPQExpBuffer(&bdr_local_node_name);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
termPQExpBuffer(&bdr_local_node_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check whether repmgr extension exists, and there are no non-BDR nodes registered */
|
|
||||||
extension_status = get_repmgr_extension_status(conn, NULL);
|
|
||||||
|
|
||||||
if (extension_status == REPMGR_UNKNOWN)
|
|
||||||
{
|
|
||||||
log_error(_("unable to determine status of \"repmgr\" extension in database \"%s\""),
|
|
||||||
dbname);
|
|
||||||
PQfinish(conn);
|
|
||||||
pfree(dbname);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (extension_status == REPMGR_UNAVAILABLE)
|
|
||||||
{
|
|
||||||
log_error(_("\"repmgr\" extension is not available"));
|
|
||||||
PQfinish(conn);
|
|
||||||
pfree(dbname);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (extension_status == REPMGR_INSTALLED)
|
|
||||||
{
|
|
||||||
if (!is_bdr_repmgr(conn))
|
|
||||||
{
|
|
||||||
log_error(_("repmgr metadatabase contains records for non-BDR nodes"));
|
|
||||||
PQfinish(conn);
|
|
||||||
pfree(dbname);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
log_debug("creating repmgr extension in database \"%s\"", dbname);
|
|
||||||
|
|
||||||
begin_transaction(conn);
|
|
||||||
|
|
||||||
if (!create_repmgr_extension(conn))
|
|
||||||
{
|
|
||||||
log_error(_("unable to create repmgr extension - see preceding error message(s); aborting"));
|
|
||||||
rollback_transaction(conn);
|
|
||||||
pfree(dbname);
|
|
||||||
PQfinish(conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
commit_transaction(conn);
|
|
||||||
}
|
|
||||||
|
|
||||||
pfree(dbname);
|
|
||||||
|
|
||||||
if (bdr_node_has_repmgr_set(conn, config_file_options.node_name) == false)
|
|
||||||
{
|
|
||||||
log_debug("bdr_node_has_repmgr_set() = false");
|
|
||||||
bdr_node_set_repmgr_set(conn, config_file_options.node_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* before adding the extension tables to the replication set, if any other
|
|
||||||
* BDR nodes exist, populate repmgr.nodes with a copy of existing entries
|
|
||||||
*
|
|
||||||
* currently we won't copy the contents of any other tables
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
{
|
|
||||||
NodeInfoList local_node_records = T_NODE_INFO_LIST_INITIALIZER;
|
|
||||||
|
|
||||||
(void) get_all_node_records(conn, &local_node_records);
|
|
||||||
|
|
||||||
if (local_node_records.node_count == 0)
|
|
||||||
{
|
|
||||||
BdrNodeInfoList bdr_nodes = T_BDR_NODE_INFO_LIST_INITIALIZER;
|
|
||||||
BdrNodeInfoListCell *bdr_cell = NULL;
|
|
||||||
|
|
||||||
get_all_bdr_node_records(conn, &bdr_nodes);
|
|
||||||
|
|
||||||
if (bdr_nodes.node_count == 0)
|
|
||||||
{
|
|
||||||
log_error(_("unable to retrieve any BDR node records"));
|
|
||||||
log_detail("%s", PQerrorMessage(conn));
|
|
||||||
PQfinish(conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (bdr_cell = bdr_nodes.head; bdr_cell; bdr_cell = bdr_cell->next)
|
|
||||||
{
|
|
||||||
PGconn *bdr_node_conn = NULL;
|
|
||||||
NodeInfoList existing_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
|
||||||
NodeInfoListCell *cell = NULL;
|
|
||||||
ExtensionStatus other_node_extension_status = REPMGR_UNKNOWN;
|
|
||||||
|
|
||||||
/* skip the local node */
|
|
||||||
if (strncmp(node_info.node_name, bdr_cell->node_info->node_name, sizeof(node_info.node_name)) == 0)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
log_debug("connecting to BDR node \"%s\" (conninfo: \"%s\")",
|
|
||||||
bdr_cell->node_info->node_name,
|
|
||||||
bdr_cell->node_info->node_local_dsn);
|
|
||||||
bdr_node_conn = establish_db_connection_quiet(bdr_cell->node_info->node_local_dsn);
|
|
||||||
|
|
||||||
if (PQstatus(bdr_node_conn) != CONNECTION_OK)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check repmgr schema exists, skip if not */
|
|
||||||
other_node_extension_status = get_repmgr_extension_status(bdr_node_conn, NULL);
|
|
||||||
|
|
||||||
if (other_node_extension_status != REPMGR_INSTALLED)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
(void) get_all_node_records(bdr_node_conn, &existing_nodes);
|
|
||||||
|
|
||||||
for (cell = existing_nodes.head; cell; cell = cell->next)
|
|
||||||
{
|
|
||||||
log_debug("creating record for node \"%s\" (ID: %i)",
|
|
||||||
cell->node_info->node_name, cell->node_info->node_id);
|
|
||||||
create_node_record(conn, "bdr register", cell->node_info);
|
|
||||||
}
|
|
||||||
|
|
||||||
PQfinish(bdr_node_conn);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add the repmgr extension tables to a replication set */
|
|
||||||
|
|
||||||
if (get_bdr_version_num() < 3)
|
|
||||||
{
|
|
||||||
add_extension_tables_to_bdr_replication_set(conn);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* this is the only table we need to replicate */
|
|
||||||
char *replication_set = get_default_bdr_replication_set(conn);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* this probably won't happen, but we need to be sure we're using
|
|
||||||
* the replication set metadata correctly...
|
|
||||||
*/
|
|
||||||
if (conn == NULL)
|
|
||||||
{
|
|
||||||
log_error(_("unable to retrieve default BDR replication set"));
|
|
||||||
log_hint(_("see preceding messages"));
|
|
||||||
log_debug("check query in get_default_bdr_replication_set()");
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_table_in_bdr_replication_set(conn, "nodes", replication_set) == false)
|
|
||||||
{
|
|
||||||
add_table_to_bdr_replication_set(conn, "nodes", replication_set);
|
|
||||||
}
|
|
||||||
|
|
||||||
pfree(replication_set);
|
|
||||||
}
|
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
|
||||||
|
|
||||||
begin_transaction(conn);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* we'll check if a record exists (even if the schema was just created),
|
|
||||||
* as there's a faint chance of a race condition
|
|
||||||
*/
|
|
||||||
|
|
||||||
record_status = get_node_record(conn, config_file_options.node_id, &node_info);
|
|
||||||
|
|
||||||
/* Update internal node record */
|
|
||||||
|
|
||||||
node_info.type = BDR;
|
|
||||||
node_info.node_id = config_file_options.node_id;
|
|
||||||
node_info.upstream_node_id = NO_UPSTREAM_NODE;
|
|
||||||
node_info.active = true;
|
|
||||||
node_info.priority = config_file_options.priority;
|
|
||||||
|
|
||||||
strncpy(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name));
|
|
||||||
strncpy(node_info.location, config_file_options.location, sizeof(node_info.location));
|
|
||||||
strncpy(node_info.conninfo, config_file_options.conninfo, sizeof(node_info.conninfo));
|
|
||||||
|
|
||||||
if (record_status == RECORD_FOUND)
|
|
||||||
{
|
|
||||||
bool node_updated = false;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* At this point we will have established there are no non-BDR
|
|
||||||
* records, so no need to verify the node type
|
|
||||||
*/
|
|
||||||
if (!runtime_options.force)
|
|
||||||
{
|
|
||||||
log_error(_("this node is already registered"));
|
|
||||||
log_hint(_("use -F/--force to overwrite the existing node record"));
|
|
||||||
rollback_transaction(conn);
|
|
||||||
PQfinish(conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* don't permit changing the node name - this must match the BDR node
|
|
||||||
* name set when the node was registered.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (strncmp(node_info.node_name, config_file_options.node_name, sizeof(node_info.node_name)) != 0)
|
|
||||||
{
|
|
||||||
log_error(_("a record for node %i is already registered with node_name \"%s\""),
|
|
||||||
config_file_options.node_id, node_info.node_name);
|
|
||||||
log_hint(_("node_name configured in repmgr.conf is \"%s\""), config_file_options.node_name);
|
|
||||||
|
|
||||||
rollback_transaction(conn);
|
|
||||||
PQfinish(conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
node_updated = update_node_record(conn, "bdr register", &node_info);
|
|
||||||
|
|
||||||
if (node_updated == true)
|
|
||||||
{
|
|
||||||
appendPQExpBuffer(&event_details, _("node record updated for node \"%s\" (%i)"),
|
|
||||||
config_file_options.node_name, config_file_options.node_id);
|
|
||||||
log_verbose(LOG_NOTICE, "%s", event_details.data);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
success = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* create new node record */
|
|
||||||
bool node_created = create_node_record(conn, "bdr register", &node_info);
|
|
||||||
|
|
||||||
if (node_created == true)
|
|
||||||
{
|
|
||||||
appendPQExpBuffer(&event_details,
|
|
||||||
_("node record created for node \"%s\" (ID: %i)"),
|
|
||||||
config_file_options.node_name, config_file_options.node_id);
|
|
||||||
log_notice("%s", event_details.data);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
success = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (success == false)
|
|
||||||
{
|
|
||||||
rollback_transaction(conn);
|
|
||||||
PQfinish(conn);
|
|
||||||
exit(ERR_DB_QUERY);
|
|
||||||
}
|
|
||||||
|
|
||||||
commit_transaction(conn);
|
|
||||||
/* Log the event */
|
|
||||||
create_event_notification(
|
|
||||||
conn,
|
|
||||||
&config_file_options,
|
|
||||||
config_file_options.node_id,
|
|
||||||
"bdr_register",
|
|
||||||
true,
|
|
||||||
event_details.data);
|
|
||||||
|
|
||||||
termPQExpBuffer(&event_details);
|
|
||||||
|
|
||||||
PQfinish(conn);
|
|
||||||
|
|
||||||
log_notice(_("BDR node %i registered (conninfo: %s)"),
|
|
||||||
config_file_options.node_id, config_file_options.conninfo);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
do_bdr_unregister(void)
|
|
||||||
{
|
|
||||||
PGconn *conn = NULL;
|
|
||||||
ExtensionStatus extension_status = REPMGR_UNKNOWN;
|
|
||||||
int target_node_id = UNKNOWN_NODE_ID;
|
|
||||||
t_node_info node_info = T_NODE_INFO_INITIALIZER;
|
|
||||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
|
||||||
bool node_record_deleted = false;
|
|
||||||
PQExpBufferData event_details;
|
|
||||||
char *dbname;
|
|
||||||
|
|
||||||
/* sanity-check configuration for BDR-compatability */
|
|
||||||
|
|
||||||
if (config_file_options.replication_type != REPLICATION_TYPE_BDR)
|
|
||||||
{
|
|
||||||
log_error(_("cannot run BDR UNREGISTER on a non-BDR node"));
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
dbname = pg_malloc0(MAXLEN);
|
|
||||||
|
|
||||||
if (dbname == NULL)
|
|
||||||
{
|
|
||||||
log_error(_("unable to allocate memory; terminating."));
|
|
||||||
exit(ERR_OUT_OF_MEMORY);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* store the database name for future reference */
|
|
||||||
get_conninfo_value(config_file_options.conninfo, "dbname", dbname);
|
|
||||||
|
|
||||||
conn = establish_db_connection(config_file_options.conninfo, true);
|
|
||||||
|
|
||||||
if (!is_bdr_db(conn, NULL))
|
|
||||||
{
|
|
||||||
log_error(_("database \"%s\" is not BDR-enabled"), dbname);
|
|
||||||
PQfinish(conn);
|
|
||||||
pfree(dbname);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
extension_status = get_repmgr_extension_status(conn, NULL);
|
|
||||||
if (extension_status != REPMGR_INSTALLED)
|
|
||||||
{
|
|
||||||
log_error(_("repmgr is not installed on database \"%s\""), dbname);
|
|
||||||
PQfinish(conn);
|
|
||||||
pfree(dbname);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
pfree(dbname);
|
|
||||||
|
|
||||||
if (!is_bdr_repmgr(conn))
|
|
||||||
{
|
|
||||||
log_error(_("repmgr metadatabase contains records for non-BDR nodes"));
|
|
||||||
PQfinish(conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
|
||||||
if (runtime_options.node_id != UNKNOWN_NODE_ID)
|
|
||||||
target_node_id = runtime_options.node_id;
|
|
||||||
else
|
|
||||||
target_node_id = config_file_options.node_id;
|
|
||||||
|
|
||||||
|
|
||||||
/* Check node exists and is really a BDR node */
|
|
||||||
record_status = get_node_record(conn, target_node_id, &node_info);
|
|
||||||
|
|
||||||
if (record_status != RECORD_FOUND)
|
|
||||||
{
|
|
||||||
log_error(_("no record found for node %i"), target_node_id);
|
|
||||||
PQfinish(conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
begin_transaction(conn);
|
|
||||||
|
|
||||||
log_debug("unregistering node %i", target_node_id);
|
|
||||||
|
|
||||||
node_record_deleted = delete_node_record(conn, target_node_id);
|
|
||||||
|
|
||||||
if (node_record_deleted == false)
|
|
||||||
{
|
|
||||||
appendPQExpBuffer(&event_details,
|
|
||||||
"unable to delete node record for node \"%s\" (ID: %i)",
|
|
||||||
node_info.node_name,
|
|
||||||
target_node_id);
|
|
||||||
rollback_transaction(conn);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
appendPQExpBuffer(&event_details,
|
|
||||||
"node record deleted for node \"%s\" (ID: %i)",
|
|
||||||
node_info.node_name,
|
|
||||||
target_node_id);
|
|
||||||
commit_transaction(conn);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* Log the event */
|
|
||||||
create_event_notification(
|
|
||||||
conn,
|
|
||||||
&config_file_options,
|
|
||||||
config_file_options.node_id,
|
|
||||||
"bdr_unregister",
|
|
||||||
true,
|
|
||||||
event_details.data);
|
|
||||||
|
|
||||||
PQfinish(conn);
|
|
||||||
|
|
||||||
log_notice(_("bdr node \"%s\" (ID: %i) successfully unregistered"),
|
|
||||||
node_info.node_name, target_node_id);
|
|
||||||
|
|
||||||
termPQExpBuffer(&event_details);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
do_bdr_help(void)
|
|
||||||
{
|
|
||||||
print_help_header();
|
|
||||||
|
|
||||||
printf(_("Usage:\n"));
|
|
||||||
printf(_(" %s [OPTIONS] bdr register\n"), progname());
|
|
||||||
printf(_(" %s [OPTIONS] bdr unregister\n"), progname());
|
|
||||||
puts("");
|
|
||||||
|
|
||||||
printf(_("BDR REGISTER\n"));
|
|
||||||
puts("");
|
|
||||||
printf(_(" \"bdr register\" initialises the repmgr cluster and registers the initial bdr node.\n"));
|
|
||||||
puts("");
|
|
||||||
printf(_(" -F, --force overwrite an existing node record\n"));
|
|
||||||
puts("");
|
|
||||||
|
|
||||||
printf(_("BDR UNREGISTER\n"));
|
|
||||||
puts("");
|
|
||||||
printf(_(" \"bdr unregister\" unregisters an inactive BDR node.\n"));
|
|
||||||
puts("");
|
|
||||||
printf(_(" --node-id ID node to unregister (optional, used when the node to unregister\n" \
|
|
||||||
" is offline)\n"));
|
|
||||||
puts("");
|
|
||||||
}
|
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
/*
|
|
||||||
* repmgr-action-bdr.h
|
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _REPMGR_ACTION_BDR_H_
|
|
||||||
#define _REPMGR_ACTION_BDR_H_
|
|
||||||
|
|
||||||
extern void do_bdr_register(void);
|
|
||||||
extern void do_bdr_unregister(void);
|
|
||||||
|
|
||||||
extern void do_bdr_help(void);
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* _REPMGR_ACTION_BDR_H_ */
|
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* Implements cluster information actions for the repmgr command line utility
|
* Implements cluster information actions for the repmgr command line utility
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -55,10 +55,8 @@ typedef enum
|
|||||||
struct ColHeader headers_show[SHOW_HEADER_COUNT];
|
struct ColHeader headers_show[SHOW_HEADER_COUNT];
|
||||||
struct ColHeader headers_event[EVENT_HEADER_COUNT];
|
struct ColHeader headers_event[EVENT_HEADER_COUNT];
|
||||||
|
|
||||||
|
static int build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, ItemList *warnings, int *error_code);
|
||||||
|
static int build_cluster_crosscheck(t_node_status_cube ***cube_dest, ItemList *warnings, int *error_code);
|
||||||
static int build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, ItemList *warnings, int *error_code);
|
|
||||||
static int build_cluster_crosscheck(t_node_status_cube ***cube_dest, int *name_length, ItemList *warnings, int *error_code);
|
|
||||||
static void cube_set_node_status(t_node_status_cube **cube, int n, int node_id, int matrix_node_id, int connection_node_id, int connection_status);
|
static void cube_set_node_status(t_node_status_cube **cube, int n, int node_id, int matrix_node_id, int connection_node_id, int connection_status);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -67,6 +65,8 @@ static void cube_set_node_status(t_node_status_cube **cube, int n, int node_id,
|
|||||||
* Parameters:
|
* Parameters:
|
||||||
* --compact
|
* --compact
|
||||||
* --csv
|
* --csv
|
||||||
|
* --terse
|
||||||
|
* --verbose
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
do_cluster_show(void)
|
do_cluster_show(void)
|
||||||
@@ -206,7 +206,8 @@ do_cluster_show(void)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* NOP on pre-9.6 servers */
|
/* NOP on pre-9.6 servers */
|
||||||
cell->node_info->replication_info->timeline_id = get_node_timeline(cell->node_info->conn);
|
cell->node_info->replication_info->timeline_id = get_node_timeline(cell->node_info->conn,
|
||||||
|
cell->node_info->replication_info->timeline_id_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
initPQExpBuffer(&node_status);
|
initPQExpBuffer(&node_status);
|
||||||
@@ -244,18 +245,13 @@ do_cluster_show(void)
|
|||||||
|
|
||||||
headers_show[SHOW_LOCATION].cur_length = strlen(cell->node_info->location);
|
headers_show[SHOW_LOCATION].cur_length = strlen(cell->node_info->location);
|
||||||
|
|
||||||
if (cell->node_info->replication_info->timeline_id == UNKNOWN_TIMELINE_ID)
|
/* Format timeline ID */
|
||||||
|
if (cell->node_info->type == WITNESS)
|
||||||
{
|
{
|
||||||
/* display "?" */
|
/* The witness node's timeline ID is irrelevant */
|
||||||
headers_show[SHOW_PRIORITY].cur_length = 1;
|
strncpy(cell->node_info->replication_info->timeline_id_str, _("n/a"), MAXLEN);
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
initPQExpBuffer(&buf);
|
|
||||||
appendPQExpBuffer(&buf, "%i", cell->node_info->replication_info->timeline_id);
|
|
||||||
headers_show[SHOW_PRIORITY].cur_length = strlen(buf.data);
|
|
||||||
termPQExpBuffer(&buf);
|
|
||||||
}
|
}
|
||||||
|
headers_show[SHOW_TIMELINE_ID].cur_length = strlen(cell->node_info->replication_info->timeline_id_str);
|
||||||
|
|
||||||
headers_show[SHOW_CONNINFO].cur_length = strlen(cell->node_info->conninfo);
|
headers_show[SHOW_CONNINFO].cur_length = strlen(cell->node_info->conninfo);
|
||||||
|
|
||||||
@@ -322,10 +318,7 @@ do_cluster_show(void)
|
|||||||
|
|
||||||
if (headers_show[SHOW_TIMELINE_ID].display == true)
|
if (headers_show[SHOW_TIMELINE_ID].display == true)
|
||||||
{
|
{
|
||||||
if (cell->node_info->replication_info->timeline_id == UNKNOWN_TIMELINE_ID)
|
printf("| %-*s ", headers_show[SHOW_TIMELINE_ID].max_length, cell->node_info->replication_info->timeline_id_str);
|
||||||
printf("| %-*c ", headers_show[SHOW_TIMELINE_ID].max_length, '?');
|
|
||||||
else
|
|
||||||
printf("| %-*i ", headers_show[SHOW_TIMELINE_ID].max_length, (int)cell->node_info->replication_info->timeline_id);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (headers_show[SHOW_CONNINFO].display == true)
|
if (headers_show[SHOW_CONNINFO].display == true)
|
||||||
@@ -343,14 +336,25 @@ do_cluster_show(void)
|
|||||||
/* emit any warnings */
|
/* emit any warnings */
|
||||||
if (warnings.head != NULL && runtime_options.terse == false && runtime_options.output_mode != OM_CSV)
|
if (warnings.head != NULL && runtime_options.terse == false && runtime_options.output_mode != OM_CSV)
|
||||||
{
|
{
|
||||||
ItemListCell *cell = NULL;
|
|
||||||
|
|
||||||
printf(_("\nWARNING: following issues were detected\n"));
|
ItemListCell *cell = NULL;
|
||||||
|
PQExpBufferData warning;
|
||||||
|
|
||||||
|
initPQExpBuffer(&warning);
|
||||||
|
|
||||||
|
appendPQExpBufferStr(&warning,
|
||||||
|
_("following issues were detected\n"));
|
||||||
for (cell = warnings.head; cell; cell = cell->next)
|
for (cell = warnings.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
printf(_(" - %s\n"), cell->string);
|
appendPQExpBuffer(&warning,
|
||||||
|
_(" - %s\n"), cell->string);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
puts("");
|
||||||
|
log_warning("%s", warning.data);
|
||||||
|
|
||||||
|
termPQExpBuffer(&warning);
|
||||||
|
|
||||||
if (runtime_options.verbose == false && connection_error_found == true)
|
if (runtime_options.verbose == false && connection_error_found == true)
|
||||||
{
|
{
|
||||||
log_hint(_("execute with --verbose option to see connection error messages"));
|
log_hint(_("execute with --verbose option to see connection error messages"));
|
||||||
@@ -532,9 +536,6 @@ do_cluster_crosscheck(void)
|
|||||||
{
|
{
|
||||||
int i = 0,
|
int i = 0,
|
||||||
n = 0;
|
n = 0;
|
||||||
char c;
|
|
||||||
const char *node_header = "Name";
|
|
||||||
int name_length = strlen(node_header);
|
|
||||||
|
|
||||||
t_node_status_cube **cube;
|
t_node_status_cube **cube;
|
||||||
|
|
||||||
@@ -542,7 +543,7 @@ do_cluster_crosscheck(void)
|
|||||||
int error_code = SUCCESS;
|
int error_code = SUCCESS;
|
||||||
ItemList warnings = {NULL, NULL};
|
ItemList warnings = {NULL, NULL};
|
||||||
|
|
||||||
n = build_cluster_crosscheck(&cube, &name_length, &warnings, &error_code);
|
n = build_cluster_crosscheck(&cube, &warnings, &error_code);
|
||||||
|
|
||||||
if (runtime_options.output_mode == OM_CSV)
|
if (runtime_options.output_mode == OM_CSV)
|
||||||
{
|
{
|
||||||
@@ -576,24 +577,56 @@ do_cluster_crosscheck(void)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
printf("%*s | Id ", name_length, node_header);
|
/* output header contains node name, node ID and one column for each node in the cluster */
|
||||||
for (i = 0; i < n; i++)
|
struct ColHeader *headers_crosscheck = NULL;
|
||||||
printf("| %2d ", cube[i]->node_id);
|
|
||||||
printf("\n");
|
int header_count = n + 2;
|
||||||
|
int header_id = 2;
|
||||||
|
|
||||||
|
headers_crosscheck = palloc0(sizeof(ColHeader) * header_count);
|
||||||
|
|
||||||
|
/* Initialize column headers */
|
||||||
|
strncpy(headers_crosscheck[0].title, _("Name"), MAXLEN);
|
||||||
|
strncpy(headers_crosscheck[1].title, _("ID"), MAXLEN);
|
||||||
|
|
||||||
for (i = 0; i < name_length; i++)
|
|
||||||
printf("-");
|
|
||||||
printf("-+----");
|
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
printf("+----");
|
{
|
||||||
printf("\n");
|
maxlen_snprintf(headers_crosscheck[header_id].title, "%i", cube[i]->node_id);
|
||||||
|
header_id++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize column max values */
|
||||||
|
for (i = 0; i < header_count; i++)
|
||||||
|
{
|
||||||
|
headers_crosscheck[i].display = true;
|
||||||
|
headers_crosscheck[i].max_length = strlen(headers_crosscheck[i].title);
|
||||||
|
headers_crosscheck[i].cur_length = headers_crosscheck[i].max_length;
|
||||||
|
|
||||||
|
/* We can derive the maximum node ID length for the ID column from
|
||||||
|
* the generated matrix node ID headers
|
||||||
|
*/
|
||||||
|
if (i >= 2 && headers_crosscheck[i].max_length > headers_crosscheck[1].max_length)
|
||||||
|
headers_crosscheck[1].max_length = headers_crosscheck[i].max_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
if (strlen(cube[i]->node_name) > headers_crosscheck[0].max_length)
|
||||||
|
{
|
||||||
|
headers_crosscheck[0].max_length = strlen(cube[i]->node_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print_status_header(header_count, headers_crosscheck);
|
||||||
|
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
{
|
{
|
||||||
int column_node_ix;
|
int column_node_ix;
|
||||||
|
|
||||||
printf("%*s | %2d ", name_length,
|
printf(" %-*s | %-*i ",
|
||||||
|
headers_crosscheck[0].max_length,
|
||||||
cube[i]->node_name,
|
cube[i]->node_name,
|
||||||
|
headers_crosscheck[1].max_length,
|
||||||
cube[i]->node_id);
|
cube[i]->node_id);
|
||||||
|
|
||||||
for (column_node_ix = 0; column_node_ix < n; column_node_ix++)
|
for (column_node_ix = 0; column_node_ix < n; column_node_ix++)
|
||||||
@@ -601,6 +634,8 @@ do_cluster_crosscheck(void)
|
|||||||
int max_node_status = -2;
|
int max_node_status = -2;
|
||||||
int node_ix = 0;
|
int node_ix = 0;
|
||||||
|
|
||||||
|
char c;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The value of entry (i,j) is equal to the maximum value of all
|
* The value of entry (i,j) is equal to the maximum value of all
|
||||||
* the (i,j,k). Indeed:
|
* the (i,j,k). Indeed:
|
||||||
@@ -640,7 +675,7 @@ do_cluster_crosscheck(void)
|
|||||||
exit(ERR_INTERNAL);
|
exit(ERR_INTERNAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("| %c ", c);
|
printf("| %-*c ", headers_crosscheck[column_node_ix + 2].max_length, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
@@ -702,16 +737,13 @@ do_cluster_matrix()
|
|||||||
j = 0,
|
j = 0,
|
||||||
n = 0;
|
n = 0;
|
||||||
|
|
||||||
const char *node_header = "Name";
|
|
||||||
int name_length = strlen(node_header);
|
|
||||||
|
|
||||||
t_node_matrix_rec **matrix_rec_list;
|
t_node_matrix_rec **matrix_rec_list;
|
||||||
|
|
||||||
bool connection_error_found = false;
|
bool connection_error_found = false;
|
||||||
int error_code = SUCCESS;
|
int error_code = SUCCESS;
|
||||||
ItemList warnings = {NULL, NULL};
|
ItemList warnings = {NULL, NULL};
|
||||||
|
|
||||||
n = build_cluster_matrix(&matrix_rec_list, &name_length, &warnings, &error_code);
|
n = build_cluster_matrix(&matrix_rec_list, &warnings, &error_code);
|
||||||
|
|
||||||
if (runtime_options.output_mode == OM_CSV)
|
if (runtime_options.output_mode == OM_CSV)
|
||||||
{
|
{
|
||||||
@@ -734,27 +766,60 @@ do_cluster_matrix()
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
char c;
|
/* output header contains node name, node ID and one column for each node in the cluster */
|
||||||
|
struct ColHeader *headers_matrix = NULL;
|
||||||
|
|
||||||
printf("%*s | Id ", name_length, node_header);
|
int header_count = n + 2;
|
||||||
for (i = 0; i < n; i++)
|
int header_id = 2;
|
||||||
printf("| %2d ", matrix_rec_list[i]->node_id);
|
|
||||||
printf("\n");
|
|
||||||
|
|
||||||
for (i = 0; i < name_length; i++)
|
headers_matrix = palloc0(sizeof(ColHeader) * header_count);
|
||||||
printf("-");
|
|
||||||
printf("-+----");
|
/* Initialize column headers */
|
||||||
for (i = 0; i < n; i++)
|
strncpy(headers_matrix[0].title, _("Name"), MAXLEN);
|
||||||
printf("+----");
|
strncpy(headers_matrix[1].title, _("ID"), MAXLEN);
|
||||||
printf("\n");
|
|
||||||
|
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
{
|
{
|
||||||
printf("%*s | %2d ", name_length,
|
maxlen_snprintf(headers_matrix[header_id].title, "%i", matrix_rec_list[i]->node_id);
|
||||||
|
header_id++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Initialize column max values */
|
||||||
|
for (i = 0; i < header_count; i++)
|
||||||
|
{
|
||||||
|
headers_matrix[i].display = true;
|
||||||
|
headers_matrix[i].max_length = strlen(headers_matrix[i].title);
|
||||||
|
headers_matrix[i].cur_length = headers_matrix[i].max_length;
|
||||||
|
|
||||||
|
/* We can derive the maximum node ID length for the ID column from
|
||||||
|
* the generated matrix node ID headers
|
||||||
|
*/
|
||||||
|
if (i >= 2 && headers_matrix[i].max_length > headers_matrix[1].max_length)
|
||||||
|
headers_matrix[1].max_length = headers_matrix[i].max_length;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
if (strlen(matrix_rec_list[i]->node_name) > headers_matrix[0].max_length)
|
||||||
|
{
|
||||||
|
headers_matrix[0].max_length = strlen(matrix_rec_list[i]->node_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print_status_header(header_count, headers_matrix);
|
||||||
|
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
printf(" %-*s | %-*i ",
|
||||||
|
headers_matrix[0].max_length,
|
||||||
matrix_rec_list[i]->node_name,
|
matrix_rec_list[i]->node_name,
|
||||||
|
headers_matrix[1].max_length,
|
||||||
matrix_rec_list[i]->node_id);
|
matrix_rec_list[i]->node_id);
|
||||||
for (j = 0; j < n; j++)
|
for (j = 0; j < n; j++)
|
||||||
{
|
{
|
||||||
|
char c;
|
||||||
|
|
||||||
switch (matrix_rec_list[i]->node_status_list[j]->node_status)
|
switch (matrix_rec_list[i]->node_status_list[j]->node_status)
|
||||||
{
|
{
|
||||||
case -2:
|
case -2:
|
||||||
@@ -772,7 +837,7 @@ do_cluster_matrix()
|
|||||||
exit(ERR_INTERNAL);
|
exit(ERR_INTERNAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("| %c ", c);
|
printf("| %-*c ", headers_matrix[j + 2].max_length, c);
|
||||||
}
|
}
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
@@ -832,7 +897,7 @@ matrix_set_node_status(t_node_matrix_rec **matrix_rec_list, int n, int node_id,
|
|||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, ItemList *warnings, int *error_code)
|
build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, ItemList *warnings, int *error_code)
|
||||||
{
|
{
|
||||||
PGconn *conn = NULL;
|
PGconn *conn = NULL;
|
||||||
int i = 0,
|
int i = 0,
|
||||||
@@ -890,7 +955,6 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, Ite
|
|||||||
/* Initialise matrix structure for each node */
|
/* Initialise matrix structure for each node */
|
||||||
for (cell = nodes.head; cell; cell = cell->next)
|
for (cell = nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
int name_length_cur;
|
|
||||||
NodeInfoListCell *cell_j;
|
NodeInfoListCell *cell_j;
|
||||||
|
|
||||||
matrix_rec_list[i] = (t_node_matrix_rec *) pg_malloc0(sizeof(t_node_matrix_rec));
|
matrix_rec_list[i] = (t_node_matrix_rec *) pg_malloc0(sizeof(t_node_matrix_rec));
|
||||||
@@ -900,13 +964,6 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, Ite
|
|||||||
cell->node_info->node_name,
|
cell->node_info->node_name,
|
||||||
sizeof(matrix_rec_list[i]->node_name));
|
sizeof(matrix_rec_list[i]->node_name));
|
||||||
|
|
||||||
/*
|
|
||||||
* Find the maximum length of a node name
|
|
||||||
*/
|
|
||||||
name_length_cur = strlen(matrix_rec_list[i]->node_name);
|
|
||||||
if (name_length_cur > *name_length)
|
|
||||||
*name_length = name_length_cur;
|
|
||||||
|
|
||||||
matrix_rec_list[i]->node_status_list = (t_node_status_rec **) pg_malloc0(sizeof(t_node_status_rec) * nodes.node_count);
|
matrix_rec_list[i]->node_status_list = (t_node_status_rec **) pg_malloc0(sizeof(t_node_status_rec) * nodes.node_count);
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
@@ -1071,7 +1128,7 @@ build_cluster_matrix(t_node_matrix_rec ***matrix_rec_dest, int *name_length, Ite
|
|||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length, ItemList *warnings, int *error_code)
|
build_cluster_crosscheck(t_node_status_cube ***dest_cube, ItemList *warnings, int *error_code)
|
||||||
{
|
{
|
||||||
PGconn *conn = NULL;
|
PGconn *conn = NULL;
|
||||||
int h,
|
int h,
|
||||||
@@ -1120,20 +1177,12 @@ build_cluster_crosscheck(t_node_status_cube ***dest_cube, int *name_length, Item
|
|||||||
|
|
||||||
for (cell = nodes.head; cell; cell = cell->next)
|
for (cell = nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
int name_length_cur = 0;
|
|
||||||
NodeInfoListCell *cell_i = NULL;
|
NodeInfoListCell *cell_i = NULL;
|
||||||
|
|
||||||
cube[h] = (t_node_status_cube *) pg_malloc(sizeof(t_node_status_cube));
|
cube[h] = (t_node_status_cube *) pg_malloc(sizeof(t_node_status_cube));
|
||||||
cube[h]->node_id = cell->node_info->node_id;
|
cube[h]->node_id = cell->node_info->node_id;
|
||||||
strncpy(cube[h]->node_name, cell->node_info->node_name, sizeof(cube[h]->node_name));
|
strncpy(cube[h]->node_name, cell->node_info->node_name, sizeof(cube[h]->node_name));
|
||||||
|
|
||||||
/*
|
|
||||||
* Find the maximum length of a node name
|
|
||||||
*/
|
|
||||||
name_length_cur = strlen(cube[h]->node_name);
|
|
||||||
if (name_length_cur > *name_length)
|
|
||||||
*name_length = name_length_cur;
|
|
||||||
|
|
||||||
cube[h]->matrix_list_rec = (t_node_matrix_rec **) pg_malloc(sizeof(t_node_matrix_rec) * nodes.node_count);
|
cube[h]->matrix_list_rec = (t_node_matrix_rec **) pg_malloc(sizeof(t_node_matrix_rec) * nodes.node_count);
|
||||||
|
|
||||||
i = 0;
|
i = 0;
|
||||||
@@ -1396,6 +1445,10 @@ do_cluster_cleanup(void)
|
|||||||
log_warning(_("unable to vacuum table \"repmgr.monitoring_history\""));
|
log_warning(_("unable to vacuum table \"repmgr.monitoring_history\""));
|
||||||
log_detail("%s", PQerrorMessage(primary_conn));
|
log_detail("%s", PQerrorMessage(primary_conn));
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_info(_("vacuum of table \"repmgr.monitoring_history\" completed"));
|
||||||
|
}
|
||||||
|
|
||||||
if (runtime_options.keep_history == 0)
|
if (runtime_options.keep_history == 0)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-action-cluster.h
|
* repmgr-action-cluster.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
* repmgr-action-daemon.c
|
* repmgr-action-daemon.c
|
||||||
*
|
*
|
||||||
* Implements repmgrd actions for the repmgr command line utility
|
* Implements repmgrd actions for the repmgr command line utility
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-action-daemon.h
|
* repmgr-action-daemon.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* Implements actions available for any kind of node
|
* Implements actions available for any kind of node
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -43,13 +43,14 @@ static void _do_node_restore_config(void);
|
|||||||
|
|
||||||
static void do_node_check_replication_connection(void);
|
static void do_node_check_replication_connection(void);
|
||||||
static CheckStatus do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list_output);
|
static CheckStatus do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_output);
|
static CheckStatus do_node_check_downstream(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
|
static CheckStatus do_node_check_upstream(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
static CheckStatus do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
static CheckStatus do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
|
static CheckStatus do_node_check_replication_config_owner(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
||||||
/*
|
/*
|
||||||
* NODE STATUS
|
* NODE STATUS
|
||||||
*
|
*
|
||||||
@@ -159,7 +160,6 @@ do_node_status(void)
|
|||||||
_("- node is registered as standby but running as primary"));
|
_("- node is registered as standby but running as primary"));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case BDR:
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -204,7 +204,16 @@ do_node_status(void)
|
|||||||
|
|
||||||
if (enabled == false && recovery_type == RECTYPE_STANDBY)
|
if (enabled == false && recovery_type == RECTYPE_STANDBY)
|
||||||
{
|
{
|
||||||
appendPQExpBufferStr(&archiving_status, " (on standbys \"archive_mode\" must be set to \"always\" to be effective)");
|
if (PQserverVersion(conn) >= 90500)
|
||||||
|
{
|
||||||
|
appendPQExpBufferStr(&archiving_status,
|
||||||
|
" (on standbys \"archive_mode\" must be set to \"always\" to be effective)");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
appendPQExpBufferStr(&archiving_status,
|
||||||
|
" (\"archive_mode\" has no effect on standbys)");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
key_value_list_set(&node_status,
|
key_value_list_set(&node_status,
|
||||||
@@ -294,7 +303,7 @@ do_node_status(void)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_downstream_node_attached(conn, node_cell->node_info->node_name) != NODE_ATTACHED)
|
if (is_downstream_node_attached(conn, node_cell->node_info->node_name, NULL) != NODE_ATTACHED)
|
||||||
{
|
{
|
||||||
missing_nodes_count++;
|
missing_nodes_count++;
|
||||||
item_list_append_format(&missing_nodes,
|
item_list_append_format(&missing_nodes,
|
||||||
@@ -717,10 +726,49 @@ do_node_check(void)
|
|||||||
exit(SUCCESS);
|
exit(SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strlen(config_file_options.conninfo))
|
|
||||||
conn = establish_db_connection(config_file_options.conninfo, true);
|
if (config_file_options.conninfo[0] != '\0')
|
||||||
|
{
|
||||||
|
t_conninfo_param_list node_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
||||||
|
char *errmsg = NULL;
|
||||||
|
bool parse_success = false;
|
||||||
|
|
||||||
|
initialize_conninfo_params(&node_conninfo, false);
|
||||||
|
|
||||||
|
parse_success = parse_conninfo_string(config_file_options.conninfo,
|
||||||
|
&node_conninfo,
|
||||||
|
&errmsg, false);
|
||||||
|
|
||||||
|
if (parse_success == false)
|
||||||
|
{
|
||||||
|
log_error(_("unable to parse conninfo string \"%s\" for local node"),
|
||||||
|
config_file_options.conninfo);
|
||||||
|
log_detail("%s", errmsg);
|
||||||
|
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If --superuser option provided, attempt to connect as the specified user
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (runtime_options.superuser[0] != '\0')
|
||||||
|
{
|
||||||
|
conn = establish_db_connection_with_replacement_param(
|
||||||
|
config_file_options.conninfo,
|
||||||
|
"user",
|
||||||
|
runtime_options.superuser,
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
conn = establish_db_connection_by_params(&node_conninfo, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
conn = establish_db_connection_by_params(&source_conninfo, true);
|
conn = establish_db_connection_by_params(&source_conninfo, true);
|
||||||
|
}
|
||||||
|
|
||||||
if (get_node_record(conn, config_file_options.node_id, &node_info) != RECORD_FOUND)
|
if (get_node_record(conn, config_file_options.node_id, &node_info) != RECORD_FOUND)
|
||||||
{
|
{
|
||||||
@@ -744,15 +792,25 @@ do_node_check(void)
|
|||||||
exit(return_code);
|
exit(return_code);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (runtime_options.downstream == true)
|
if (runtime_options.upstream == true)
|
||||||
{
|
{
|
||||||
return_code = do_node_check_downstream(conn,
|
return_code = do_node_check_upstream(conn,
|
||||||
runtime_options.output_mode,
|
runtime_options.output_mode,
|
||||||
NULL);
|
&node_info,
|
||||||
|
NULL);
|
||||||
PQfinish(conn);
|
PQfinish(conn);
|
||||||
exit(return_code);
|
exit(return_code);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (runtime_options.downstream == true)
|
||||||
|
{
|
||||||
|
return_code = do_node_check_downstream(conn,
|
||||||
|
runtime_options.output_mode,
|
||||||
|
&node_info,
|
||||||
|
NULL);
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(return_code);
|
||||||
|
}
|
||||||
|
|
||||||
if (runtime_options.replication_lag == true)
|
if (runtime_options.replication_lag == true)
|
||||||
{
|
{
|
||||||
@@ -804,6 +862,16 @@ do_node_check(void)
|
|||||||
exit(return_code);
|
exit(return_code);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (runtime_options.replication_config_owner == true)
|
||||||
|
{
|
||||||
|
return_code = do_node_check_replication_config_owner(conn,
|
||||||
|
runtime_options.output_mode,
|
||||||
|
&node_info,
|
||||||
|
NULL);
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(return_code);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (runtime_options.output_mode == OM_NAGIOS)
|
if (runtime_options.output_mode == OM_NAGIOS)
|
||||||
{
|
{
|
||||||
@@ -827,7 +895,10 @@ do_node_check(void)
|
|||||||
if (do_node_check_archive_ready(conn, runtime_options.output_mode, &status_list) != CHECK_STATUS_OK)
|
if (do_node_check_archive_ready(conn, runtime_options.output_mode, &status_list) != CHECK_STATUS_OK)
|
||||||
issue_detected = true;
|
issue_detected = true;
|
||||||
|
|
||||||
if (do_node_check_downstream(conn, runtime_options.output_mode, &status_list) != CHECK_STATUS_OK)
|
if (do_node_check_upstream(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
||||||
|
issue_detected = true;
|
||||||
|
|
||||||
|
if (do_node_check_downstream(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
||||||
issue_detected = true;
|
issue_detected = true;
|
||||||
|
|
||||||
if (do_node_check_slots(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
if (do_node_check_slots(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
||||||
@@ -909,7 +980,6 @@ do_node_check_replication_connection(void)
|
|||||||
PGconn *repl_conn = NULL;
|
PGconn *repl_conn = NULL;
|
||||||
t_node_info node_record = T_NODE_INFO_INITIALIZER;
|
t_node_info node_record = T_NODE_INFO_INITIALIZER;
|
||||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||||
t_conninfo_param_list remote_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
|
||||||
PQExpBufferData output;
|
PQExpBufferData output;
|
||||||
|
|
||||||
|
|
||||||
@@ -939,17 +1009,8 @@ do_node_check_replication_connection(void)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
initialize_conninfo_params(&remote_conninfo, false);
|
repl_conn = establish_replication_connection_from_conninfo(node_record.conninfo,
|
||||||
parse_conninfo_string(node_record.conninfo, &remote_conninfo, NULL, false);
|
node_record.repluser);
|
||||||
|
|
||||||
if (strcmp(param_get(&remote_conninfo, "user"), node_record.repluser) != 0)
|
|
||||||
{
|
|
||||||
param_set(&remote_conninfo, "user", node_record.repluser);
|
|
||||||
param_set(&remote_conninfo, "dbname", "replication");
|
|
||||||
}
|
|
||||||
param_set(&remote_conninfo, "replication", "1");
|
|
||||||
|
|
||||||
repl_conn = establish_db_connection_by_params(&remote_conninfo, false);
|
|
||||||
|
|
||||||
if (PQstatus(repl_conn) != CONNECTION_OK)
|
if (PQstatus(repl_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
@@ -1132,7 +1193,7 @@ do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list
|
|||||||
|
|
||||||
|
|
||||||
static CheckStatus
|
static CheckStatus
|
||||||
do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_output)
|
do_node_check_downstream(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
||||||
{
|
{
|
||||||
NodeInfoList downstream_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
NodeInfoList downstream_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
||||||
NodeInfoListCell *cell = NULL;
|
NodeInfoListCell *cell = NULL;
|
||||||
@@ -1166,7 +1227,7 @@ do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_ou
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_downstream_node_attached(conn, cell->node_info->node_name) != NODE_ATTACHED)
|
if (is_downstream_node_attached(conn, cell->node_info->node_name, NULL) != NODE_ATTACHED)
|
||||||
{
|
{
|
||||||
missing_nodes_count++;
|
missing_nodes_count++;
|
||||||
item_list_append_format(&missing_nodes,
|
item_list_append_format(&missing_nodes,
|
||||||
@@ -1183,7 +1244,13 @@ do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_ou
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (missing_nodes_count == 0)
|
if (node_info->type == WITNESS)
|
||||||
|
{
|
||||||
|
/* witness is not connecting to any upstream */
|
||||||
|
appendPQExpBufferStr(&details,
|
||||||
|
_("N/A - node is a witness"));
|
||||||
|
}
|
||||||
|
else if (missing_nodes_count == 0)
|
||||||
{
|
{
|
||||||
if (expected_nodes_count == 0)
|
if (expected_nodes_count == 0)
|
||||||
appendPQExpBufferStr(&details,
|
appendPQExpBufferStr(&details,
|
||||||
@@ -1298,6 +1365,104 @@ do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_ou
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static CheckStatus
|
||||||
|
do_node_check_upstream(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
||||||
|
{
|
||||||
|
PGconn *upstream_conn = NULL;
|
||||||
|
t_node_info upstream_node_info = T_NODE_INFO_INITIALIZER;
|
||||||
|
PQExpBufferData details;
|
||||||
|
|
||||||
|
CheckStatus status = CHECK_STATUS_OK;
|
||||||
|
|
||||||
|
if (mode == OM_CSV && list_output == NULL)
|
||||||
|
{
|
||||||
|
log_error(_("--csv output not provided with --upstream option"));
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
initPQExpBuffer(&details);
|
||||||
|
|
||||||
|
if (node_info->type == WITNESS)
|
||||||
|
{
|
||||||
|
/* witness is not connecting to any upstream */
|
||||||
|
appendPQExpBufferStr(&details,
|
||||||
|
_("N/A - node is a witness"));
|
||||||
|
}
|
||||||
|
else if (get_node_record(conn, node_info->upstream_node_id, &upstream_node_info) != RECORD_FOUND)
|
||||||
|
{
|
||||||
|
if (get_recovery_type(conn) == RECTYPE_STANDBY)
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(&details,
|
||||||
|
_("node \"%s\" (ID: %i) is a standby but no upstream record found"),
|
||||||
|
node_info->node_name,
|
||||||
|
node_info->node_id);
|
||||||
|
status = CHECK_STATUS_CRITICAL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
appendPQExpBufferStr(&details,
|
||||||
|
_("N/A - node is primary"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
upstream_conn = establish_db_connection(upstream_node_info.conninfo, true);
|
||||||
|
|
||||||
|
/* check our node is connected */
|
||||||
|
if (is_downstream_node_attached(upstream_conn, config_file_options.node_name, NULL) != NODE_ATTACHED)
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(&details,
|
||||||
|
_("node \"%s\" (ID: %i) is not attached to expected upstream node \"%s\" (ID: %i)"),
|
||||||
|
node_info->node_name,
|
||||||
|
node_info->node_id,
|
||||||
|
upstream_node_info.node_name,
|
||||||
|
upstream_node_info.node_id);
|
||||||
|
status = CHECK_STATUS_CRITICAL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(&details,
|
||||||
|
_("node \"%s\" (ID: %i) is attached to expected upstream node \"%s\" (ID: %i)"),
|
||||||
|
node_info->node_name,
|
||||||
|
node_info->node_id,
|
||||||
|
upstream_node_info.node_name,
|
||||||
|
upstream_node_info.node_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (mode)
|
||||||
|
{
|
||||||
|
case OM_NAGIOS:
|
||||||
|
{
|
||||||
|
printf("REPMGR_UPSTREAM_SERVER %s: %s | ",
|
||||||
|
output_check_status(status),
|
||||||
|
details.data);
|
||||||
|
}
|
||||||
|
case OM_TEXT:
|
||||||
|
if (list_output != NULL)
|
||||||
|
{
|
||||||
|
check_status_list_set(list_output,
|
||||||
|
"Upstream connection",
|
||||||
|
status,
|
||||||
|
details.data);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf("%s (%s)\n",
|
||||||
|
output_check_status(status),
|
||||||
|
details.data);
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
termPQExpBuffer(&details);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static CheckStatus
|
static CheckStatus
|
||||||
do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
||||||
{
|
{
|
||||||
@@ -1552,34 +1717,6 @@ do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckS
|
|||||||
_("node is witness"));
|
_("node is witness"));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case BDR:
|
|
||||||
{
|
|
||||||
PQExpBufferData output;
|
|
||||||
|
|
||||||
initPQExpBuffer(&output);
|
|
||||||
if (is_bdr_db(conn, &output) == false)
|
|
||||||
{
|
|
||||||
status = CHECK_STATUS_CRITICAL;
|
|
||||||
appendPQExpBufferStr(&details,
|
|
||||||
output.data);
|
|
||||||
}
|
|
||||||
termPQExpBuffer(&output);
|
|
||||||
|
|
||||||
if (status == CHECK_STATUS_OK)
|
|
||||||
{
|
|
||||||
if (is_active_bdr_node(conn, node_info->node_name) == false)
|
|
||||||
{
|
|
||||||
status = CHECK_STATUS_CRITICAL;
|
|
||||||
appendPQExpBufferStr(&details,
|
|
||||||
_("node is not an active BDR node"));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
appendPQExpBufferStr(&details,
|
|
||||||
_("node is an active BDR node"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -1828,11 +1965,11 @@ do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_in
|
|||||||
}
|
}
|
||||||
|
|
||||||
initPQExpBuffer(&details);
|
initPQExpBuffer(&details);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check actual data directory matches that in repmgr.conf; note this requires
|
* Check actual data directory matches that in repmgr.conf; note this requires
|
||||||
* a superuser connection
|
* a superuser connection
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (connection_has_pg_settings(conn) == true)
|
if (connection_has_pg_settings(conn) == true)
|
||||||
{
|
{
|
||||||
/* we expect to have a database connection */
|
/* we expect to have a database connection */
|
||||||
@@ -1875,10 +2012,9 @@ do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_in
|
|||||||
{
|
{
|
||||||
log_info(_("connection is not a superuser connection, falling back to simple check"));
|
log_info(_("connection is not a superuser connection, falling back to simple check"));
|
||||||
|
|
||||||
/* XXX add -S/--superuser option */
|
|
||||||
if (PQserverVersion(conn) >= 100000)
|
if (PQserverVersion(conn) >= 100000)
|
||||||
{
|
{
|
||||||
log_hint(_("add the \"%s\" user to group \"pg_read_all_settings\" or \"pg_monitor\""),
|
log_hint(_("provide a superuser with -S/--superuser, or add the \"%s\" user to role \"pg_read_all_settings\" or \"pg_monitor\""),
|
||||||
PQuser(conn));
|
PQuser(conn));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1899,6 +2035,12 @@ do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_in
|
|||||||
|
|
||||||
status = CHECK_STATUS_CRITICAL;
|
status = CHECK_STATUS_CRITICAL;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(&details,
|
||||||
|
_("configured \"data_directory\" is \"%s\""),
|
||||||
|
config_file_options.data_directory);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (mode)
|
switch (mode)
|
||||||
@@ -1942,6 +2084,40 @@ do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_in
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is not included in the general list output
|
||||||
|
*/
|
||||||
|
static
|
||||||
|
CheckStatus do_node_check_replication_config_owner(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
||||||
|
{
|
||||||
|
CheckStatus status = CHECK_STATUS_OK;
|
||||||
|
|
||||||
|
PQExpBufferData errmsg;
|
||||||
|
PQExpBufferData details;
|
||||||
|
|
||||||
|
if (mode != OM_OPTFORMAT)
|
||||||
|
{
|
||||||
|
log_error(_("--replication-config-owner option can only be used with --optformat"));
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
initPQExpBuffer(&errmsg);
|
||||||
|
initPQExpBuffer(&details);
|
||||||
|
|
||||||
|
if (check_replication_config_owner(PQserverVersion(conn),
|
||||||
|
config_file_options.data_directory,
|
||||||
|
&errmsg, &details) == false)
|
||||||
|
{
|
||||||
|
status = CHECK_STATUS_CRITICAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("--replication-config-owner=%s\n",
|
||||||
|
output_check_status(status));
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
do_node_service(void)
|
do_node_service(void)
|
||||||
@@ -1981,26 +2157,62 @@ do_node_service(void)
|
|||||||
|
|
||||||
if ((action == ACTION_STOP || action == ACTION_RESTART) && runtime_options.checkpoint == true)
|
if ((action == ACTION_STOP || action == ACTION_RESTART) && runtime_options.checkpoint == true)
|
||||||
{
|
{
|
||||||
if (runtime_options.dry_run == true)
|
PGconn *conn = NULL;
|
||||||
|
|
||||||
|
if (config_file_options.conninfo[0] != '\0')
|
||||||
{
|
{
|
||||||
log_info(_("a CHECKPOINT would be issued here"));
|
/*
|
||||||
|
* If --superuser option provided, attempt to connect as the specified user
|
||||||
|
*/
|
||||||
|
if (runtime_options.superuser[0] != '\0')
|
||||||
|
{
|
||||||
|
conn = establish_db_connection_with_replacement_param(
|
||||||
|
config_file_options.conninfo,
|
||||||
|
"user",
|
||||||
|
runtime_options.superuser,
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
conn = establish_db_connection(config_file_options.conninfo, true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PGconn *conn = NULL;
|
conn = establish_db_connection_by_params(&source_conninfo, true);
|
||||||
|
|
||||||
if (strlen(config_file_options.conninfo))
|
|
||||||
conn = establish_db_connection(config_file_options.conninfo, true);
|
|
||||||
else
|
|
||||||
conn = establish_db_connection_by_params(&source_conninfo, true);
|
|
||||||
|
|
||||||
log_notice(_("issuing CHECKPOINT"));
|
|
||||||
|
|
||||||
/* check superuser conn! */
|
|
||||||
checkpoint(conn);
|
|
||||||
|
|
||||||
PQfinish(conn);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (is_superuser_connection(conn, NULL) == false)
|
||||||
|
{
|
||||||
|
if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
log_warning(_("a CHECKPOINT would be issued here but no superuser connection is available"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_warning(_("a superuser connection is required to issue a CHECKPOINT"));
|
||||||
|
}
|
||||||
|
|
||||||
|
log_hint(_("provide a superuser with -S/--superuser"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
log_info(_("a CHECKPOINT would be issued here"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
|
||||||
|
log_notice(_("issuing CHECKPOINT on node \"%s\" (ID: %i) "),
|
||||||
|
config_file_options.node_name,
|
||||||
|
config_file_options.node_id);
|
||||||
|
|
||||||
|
checkpoint(conn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PQfinish(conn);
|
||||||
}
|
}
|
||||||
|
|
||||||
get_server_action(action, command, data_dir);
|
get_server_action(action, command, data_dir);
|
||||||
@@ -2147,6 +2359,7 @@ do_node_rejoin(void)
|
|||||||
PQExpBufferData follow_output;
|
PQExpBufferData follow_output;
|
||||||
struct stat statbuf;
|
struct stat statbuf;
|
||||||
t_node_info primary_node_record = T_NODE_INFO_INITIALIZER;
|
t_node_info primary_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
bool success = true;
|
bool success = true;
|
||||||
int follow_error_code = SUCCESS;
|
int follow_error_code = SUCCESS;
|
||||||
@@ -2257,6 +2470,36 @@ do_node_rejoin(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fetch the local node record - we'll need this later, and it acts as an
|
||||||
|
* additional sanity-check that the node is known to the primary.
|
||||||
|
*/
|
||||||
|
if (get_node_record(primary_conn, config_file_options.node_id, &local_node_record) != RECORD_FOUND)
|
||||||
|
{
|
||||||
|
log_error(_("unable to retrieve node record for the local node"));
|
||||||
|
log_hint(_("check the local node is registered with the current primary \"%s\" (ID: %i)"),
|
||||||
|
primary_node_record.node_name,
|
||||||
|
primary_node_record.node_id);
|
||||||
|
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sanity-check replication slot availability
|
||||||
|
*/
|
||||||
|
if (config_file_options.use_replication_slots)
|
||||||
|
{
|
||||||
|
bool slots_available = check_replication_slots_available(primary_node_record.node_id,
|
||||||
|
primary_conn);
|
||||||
|
if (slots_available == false)
|
||||||
|
{
|
||||||
|
PQfinish(primary_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* sanity-check that it will actually be possible to stream from the new upstream
|
* sanity-check that it will actually be possible to stream from the new upstream
|
||||||
*/
|
*/
|
||||||
@@ -2534,86 +2777,41 @@ do_node_rejoin(void)
|
|||||||
*/
|
*/
|
||||||
if (runtime_options.no_wait == false)
|
if (runtime_options.no_wait == false)
|
||||||
{
|
{
|
||||||
int i;
|
standy_join_status join_success = check_standby_join(primary_conn,
|
||||||
|
&primary_node_record,
|
||||||
for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
|
&local_node_record);
|
||||||
{
|
|
||||||
if (is_server_available(config_file_options.conninfo))
|
|
||||||
{
|
|
||||||
log_verbose(LOG_INFO, _("demoted primary is pingable"));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i % 5 == 0)
|
|
||||||
{
|
|
||||||
log_verbose(LOG_INFO, _("waiting for node %i to respond to pings; %i of max %i attempts"),
|
|
||||||
config_file_options.node_id,
|
|
||||||
i + 1, config_file_options.node_rejoin_timeout);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
log_debug("sleeping 1 second waiting for node %i to respond to pings; %i of max %i attempts",
|
|
||||||
config_file_options.node_id,
|
|
||||||
i + 1, config_file_options.node_rejoin_timeout);
|
|
||||||
}
|
|
||||||
|
|
||||||
sleep(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (; i < config_file_options.node_rejoin_timeout; i++)
|
|
||||||
{
|
|
||||||
NodeAttached node_attached = is_downstream_node_attached(primary_conn,
|
|
||||||
config_file_options.node_name);
|
|
||||||
|
|
||||||
if (node_attached == NODE_ATTACHED)
|
|
||||||
{
|
|
||||||
log_verbose(LOG_INFO, _("node %i has attached to its upstream node"),
|
|
||||||
config_file_options.node_id);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i % 5 == 0)
|
|
||||||
{
|
|
||||||
log_info(_("waiting for node \"%s\" (ID: %i) to connect to new primary; %i of max %i attempts"),
|
|
||||||
config_file_options.node_name,
|
|
||||||
config_file_options.node_id,
|
|
||||||
i + 1, config_file_options.node_rejoin_timeout);
|
|
||||||
log_detail(_("checking for record in node \"%s\"'s \"pg_stat_replication\" table where \"application_name\" is \"%s\""),
|
|
||||||
primary_node_record.node_name,
|
|
||||||
config_file_options.node_name);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
log_debug("sleeping 1 second waiting for node %i to connect to new primary; %i of max %i attempts",
|
|
||||||
config_file_options.node_id,
|
|
||||||
i + 1, config_file_options.node_rejoin_timeout);
|
|
||||||
}
|
|
||||||
|
|
||||||
sleep(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
create_event_notification(primary_conn,
|
create_event_notification(primary_conn,
|
||||||
&config_file_options,
|
&config_file_options,
|
||||||
config_file_options.node_id,
|
config_file_options.node_id,
|
||||||
"node_rejoin",
|
"node_rejoin",
|
||||||
success,
|
join_success == JOIN_SUCCESS ? true : false,
|
||||||
follow_output.data);
|
follow_output.data);
|
||||||
|
|
||||||
if (success == false)
|
if (join_success != JOIN_SUCCESS)
|
||||||
{
|
{
|
||||||
termPQExpBuffer(&follow_output);
|
termPQExpBuffer(&follow_output);
|
||||||
log_error(_("NODE REJOIN failed"));
|
log_error(_("NODE REJOIN failed"));
|
||||||
log_detail(_("no record for local node \"%s\" found in node \"%s\"'s \"pg_stat_replication\" table"),
|
|
||||||
config_file_options.node_name,
|
if (join_success == JOIN_FAIL_NO_PING) {
|
||||||
primary_node_record.node_name);
|
log_detail(_("local node \"%s\" did not become available start after %i seconds"),
|
||||||
|
config_file_options.node_name,
|
||||||
|
config_file_options.node_rejoin_timeout);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
log_detail(_("no active record for local node \"%s\" found in node \"%s\"'s \"pg_stat_replication\" table"),
|
||||||
|
config_file_options.node_name,
|
||||||
|
primary_node_record.node_name);
|
||||||
|
}
|
||||||
log_hint(_("check the PostgreSQL log on the local node"));
|
log_hint(_("check the PostgreSQL log on the local node"));
|
||||||
|
|
||||||
exit(ERR_REJOIN_FAIL);
|
exit(ERR_REJOIN_FAIL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* -W/--no-wait provided - check once */
|
/* -W/--no-wait provided - check once */
|
||||||
NodeAttached node_attached = is_downstream_node_attached(primary_conn, config_file_options.node_name);
|
NodeAttached node_attached = is_downstream_node_attached(primary_conn, config_file_options.node_name, NULL);
|
||||||
if (node_attached == NODE_ATTACHED)
|
if (node_attached == NODE_ATTACHED)
|
||||||
success = true;
|
success = true;
|
||||||
}
|
}
|
||||||
@@ -3173,17 +3371,22 @@ do_node_help(void)
|
|||||||
puts("");
|
puts("");
|
||||||
printf(_(" Configuration file required, runs on local node only.\n"));
|
printf(_(" Configuration file required, runs on local node only.\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" --csv emit output as CSV (not available for individual check output)\n"));
|
printf(_(" Connection options:\n"));
|
||||||
printf(_(" --nagios emit output in Nagios format (individual check output only)\n"));
|
printf(_(" -S, --superuser=USERNAME superuser to use, if repmgr user is not superuser\n"));
|
||||||
|
puts("");
|
||||||
|
printf(_(" Output options:\n"));
|
||||||
|
printf(_(" --csv emit output as CSV (not available for individual check output)\n"));
|
||||||
|
printf(_(" --nagios emit output in Nagios format (individual check output only)\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" Following options check an individual status:\n"));
|
printf(_(" Following options check an individual status:\n"));
|
||||||
printf(_(" --archive-ready number of WAL files ready for archiving\n"));
|
printf(_(" --archive-ready number of WAL files ready for archiving\n"));
|
||||||
printf(_(" --downstream whether all downstream nodes are connected\n"));
|
printf(_(" --downstream whether all downstream nodes are connected\n"));
|
||||||
printf(_(" --replication-lag replication lag in seconds (standbys only)\n"));
|
printf(_(" --uptream whether the node is connected to its upstream\n"));
|
||||||
printf(_(" --role check node has expected role\n"));
|
printf(_(" --replication-lag replication lag in seconds (standbys only)\n"));
|
||||||
printf(_(" --slots check for inactive replication slots\n"));
|
printf(_(" --role check node has expected role\n"));
|
||||||
printf(_(" --missing-slots check for missing replication slots\n"));
|
printf(_(" --slots check for inactive replication slots\n"));
|
||||||
printf(_(" --data-directory-config check repmgr's data directory configuration\n"));
|
printf(_(" --missing-slots check for missing replication slots\n"));
|
||||||
|
printf(_(" --data-directory-config check repmgr's data directory configuration\n"));
|
||||||
|
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
@@ -3202,7 +3405,7 @@ do_node_help(void)
|
|||||||
" after executing \"pg_rewind\"\n"));
|
" after executing \"pg_rewind\"\n"));
|
||||||
printf(_(" --config-archive-dir directory to temporarily store retained configuration files\n" \
|
printf(_(" --config-archive-dir directory to temporarily store retained configuration files\n" \
|
||||||
" (default: /tmp)\n"));
|
" (default: /tmp)\n"));
|
||||||
printf(_(" -W/--no-wait don't wait for the node to rejoin cluster\n"));
|
printf(_(" -W, --no-wait don't wait for the node to rejoin cluster\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
printf(_("NODE SERVICE\n"));
|
printf(_("NODE SERVICE\n"));
|
||||||
@@ -3212,10 +3415,11 @@ do_node_help(void)
|
|||||||
puts("");
|
puts("");
|
||||||
printf(_(" Configuration file required, runs on local node only.\n"));
|
printf(_(" Configuration file required, runs on local node only.\n"));
|
||||||
puts("");
|
puts("");
|
||||||
printf(_(" --dry-run show what action would be performed, but don't execute it\n"));
|
printf(_(" --dry-run show what action would be performed, but don't execute it\n"));
|
||||||
printf(_(" --action action to perform (one of \"start\", \"stop\", \"restart\" or \"reload\")\n"));
|
printf(_(" --action action to perform (one of \"start\", \"stop\", \"restart\" or \"reload\")\n"));
|
||||||
printf(_(" --list-actions show what command would be performed for each action\n"));
|
printf(_(" --list-actions show what command would be performed for each action\n"));
|
||||||
printf(_(" --checkpoint issue a CHECKPOINT before stopping or restarting the node\n"));
|
printf(_(" --checkpoint issue a CHECKPOINT before stopping or restarting the node\n"));
|
||||||
|
printf(_(" -S, --superuser=USERNAME superuser to use, if repmgr user is not superuser\n"));
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-action-node.h
|
* repmgr-action-node.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* Implements primary actions for the repmgr command line utility
|
* Implements primary actions for the repmgr command line utility
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -106,7 +106,7 @@ do_primary_register(void)
|
|||||||
current_primary_id = get_primary_node_id(conn);
|
current_primary_id = get_primary_node_id(conn);
|
||||||
if (current_primary_id != NODE_NOT_FOUND && current_primary_id != config_file_options.node_id)
|
if (current_primary_id != NODE_NOT_FOUND && current_primary_id != config_file_options.node_id)
|
||||||
{
|
{
|
||||||
log_debug("XXX %i", current_primary_id);
|
log_debug("current active primary node ID is %i", current_primary_id);
|
||||||
primary_conn = establish_primary_db_connection(conn, false);
|
primary_conn = establish_primary_db_connection(conn, false);
|
||||||
|
|
||||||
if (PQstatus(primary_conn) == CONNECTION_OK)
|
if (PQstatus(primary_conn) == CONNECTION_OK)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-action-primary.h
|
* repmgr-action-primary.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
* repmgr-action-service.c
|
* repmgr-action-service.c
|
||||||
*
|
*
|
||||||
* Implements repmgrd actions for the repmgr command line utility
|
* Implements repmgrd actions for the repmgr command line utility
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -357,11 +357,22 @@ do_service_status(void)
|
|||||||
{
|
{
|
||||||
ItemListCell *cell = NULL;
|
ItemListCell *cell = NULL;
|
||||||
|
|
||||||
printf(_("\nWARNING: following issues were detected\n"));
|
PQExpBufferData warning;
|
||||||
|
|
||||||
|
initPQExpBuffer(&warning);
|
||||||
|
|
||||||
|
appendPQExpBufferStr(&warning,
|
||||||
|
_("following issues were detected\n"));
|
||||||
|
|
||||||
for (cell = warnings.head; cell; cell = cell->next)
|
for (cell = warnings.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
printf(_(" - %s\n"), cell->string);
|
appendPQExpBuffer(&warning,
|
||||||
|
_(" - %s\n"), cell->string);
|
||||||
}
|
}
|
||||||
|
puts("");
|
||||||
|
log_warning("%s", warning.data);
|
||||||
|
|
||||||
|
termPQExpBuffer(&warning);
|
||||||
|
|
||||||
if (runtime_options.verbose == false && connection_error_found == true)
|
if (runtime_options.verbose == false && connection_error_found == true)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-action-service.h
|
* repmgr-action-service.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-action-standby.h
|
* repmgr-action-standby.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* Implements witness actions for the repmgr command line utility
|
* Implements witness actions for the repmgr command line utility
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -74,18 +74,6 @@ do_witness_register(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check that witness node is not a BDR node */
|
|
||||||
if (is_bdr_db_quiet(witness_conn) == true)
|
|
||||||
{
|
|
||||||
log_error(_("witness node is a BDR node"));
|
|
||||||
log_hint(_("a witness node cannot be configured for a BDR cluster"));
|
|
||||||
|
|
||||||
PQfinish(witness_conn);
|
|
||||||
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* connect to primary with provided parameters */
|
/* connect to primary with provided parameters */
|
||||||
log_info(_("connecting to primary node"));
|
log_info(_("connecting to primary node"));
|
||||||
|
|
||||||
@@ -194,19 +182,6 @@ do_witness_register(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check that primary node is not a BDR node */
|
|
||||||
if (is_bdr_db_quiet(primary_conn) == true)
|
|
||||||
{
|
|
||||||
log_error(_("primary node is a BDR node"));
|
|
||||||
log_hint(_("a witness node cannot be configured for a BDR cluster"));
|
|
||||||
|
|
||||||
PQfinish(witness_conn);
|
|
||||||
PQfinish(primary_conn);
|
|
||||||
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* create repmgr extension, if does not exist */
|
/* create repmgr extension, if does not exist */
|
||||||
if (runtime_options.dry_run == false && !create_repmgr_extension(witness_conn))
|
if (runtime_options.dry_run == false && !create_repmgr_extension(witness_conn))
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-action-witness.h
|
* repmgr-action-witness.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-client-global.h
|
* repmgr-client-global.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -28,6 +28,8 @@
|
|||||||
/* default value for "cluster event --limit"*/
|
/* default value for "cluster event --limit"*/
|
||||||
#define CLUSTER_EVENT_LIMIT 20
|
#define CLUSTER_EVENT_LIMIT 20
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
/* configuration metadata */
|
/* configuration metadata */
|
||||||
@@ -85,7 +87,7 @@ typedef struct
|
|||||||
char replication_user[MAXLEN];
|
char replication_user[MAXLEN];
|
||||||
char upstream_conninfo[MAXLEN];
|
char upstream_conninfo[MAXLEN];
|
||||||
bool without_barman;
|
bool without_barman;
|
||||||
bool recovery_conf_only;
|
bool replication_conf_only;
|
||||||
|
|
||||||
/* "standby clone"/"standby follow" options */
|
/* "standby clone"/"standby follow" options */
|
||||||
int upstream_node_id;
|
int upstream_node_id;
|
||||||
@@ -109,6 +111,7 @@ typedef struct
|
|||||||
/* "node check" options */
|
/* "node check" options */
|
||||||
bool archive_ready;
|
bool archive_ready;
|
||||||
bool downstream;
|
bool downstream;
|
||||||
|
bool upstream;
|
||||||
bool replication_lag;
|
bool replication_lag;
|
||||||
bool role;
|
bool role;
|
||||||
bool slots;
|
bool slots;
|
||||||
@@ -116,6 +119,7 @@ typedef struct
|
|||||||
bool has_passfile;
|
bool has_passfile;
|
||||||
bool replication_connection;
|
bool replication_connection;
|
||||||
bool data_directory_config;
|
bool data_directory_config;
|
||||||
|
bool replication_config_owner;
|
||||||
|
|
||||||
/* "node rejoin" options */
|
/* "node rejoin" options */
|
||||||
char config_files[MAXLEN];
|
char config_files[MAXLEN];
|
||||||
@@ -168,7 +172,7 @@ typedef struct
|
|||||||
/* "node status" options */ \
|
/* "node status" options */ \
|
||||||
false, \
|
false, \
|
||||||
/* "node check" options */ \
|
/* "node check" options */ \
|
||||||
false, false, false, false, false, false, false, false, false, \
|
false, false, false, false, false, false, false, false, false, false, false, \
|
||||||
/* "node rejoin" options */ \
|
/* "node rejoin" options */ \
|
||||||
"", \
|
"", \
|
||||||
/* "node service" options */ \
|
/* "node service" options */ \
|
||||||
@@ -201,6 +205,23 @@ typedef enum
|
|||||||
} t_server_action;
|
} t_server_action;
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
USER_TYPE_UNKNOWN = -1,
|
||||||
|
REPMGR_USER,
|
||||||
|
REPLICATION_USER_OPT,
|
||||||
|
REPLICATION_USER_NODE,
|
||||||
|
SUPERUSER
|
||||||
|
} t_user_type;
|
||||||
|
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
JOIN_SUCCESS,
|
||||||
|
JOIN_FAIL_NO_PING,
|
||||||
|
JOIN_FAIL_NO_REPLICATION
|
||||||
|
} standy_join_status;
|
||||||
|
|
||||||
|
|
||||||
typedef struct ColHeader
|
typedef struct ColHeader
|
||||||
{
|
{
|
||||||
char title[MAXLEN];
|
char title[MAXLEN];
|
||||||
@@ -255,9 +276,15 @@ extern void get_node_config_directory(char *config_dir_buf);
|
|||||||
extern void get_node_data_directory(char *data_dir_buf);
|
extern void get_node_data_directory(char *data_dir_buf);
|
||||||
extern void init_node_record(t_node_info *node_record);
|
extern void init_node_record(t_node_info *node_record);
|
||||||
extern bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
|
extern bool can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *reason);
|
||||||
extern void drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name);
|
|
||||||
|
|
||||||
|
extern bool create_replication_slot(PGconn *conn, char *slot_name, t_node_info *upstream_node_record, PQExpBufferData *error_msg);
|
||||||
|
extern bool drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name);
|
||||||
|
|
||||||
|
extern standy_join_status check_standby_join(PGconn *primary_conn, t_node_info *primary_node_record, t_node_info *standby_node_record);
|
||||||
|
extern bool check_replication_slots_available(int node_id, PGconn* conn);
|
||||||
extern bool check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_record, bool is_rejoin);
|
extern bool check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_record, bool is_rejoin);
|
||||||
|
extern bool check_replication_config_owner(int pg_version, const char *data_directory, PQExpBufferData *error_msg, PQExpBufferData *detail_msg);
|
||||||
|
|
||||||
extern void check_shared_library(PGconn *conn);
|
extern void check_shared_library(PGconn *conn);
|
||||||
extern bool is_repmgrd_running(PGconn *conn);
|
extern bool is_repmgrd_running(PGconn *conn);
|
||||||
extern int parse_repmgr_version(const char *version_string);
|
extern int parse_repmgr_version(const char *version_string);
|
||||||
|
|||||||
707
repmgr-client.c
707
repmgr-client.c
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-client.c - Command interpreter for the repmgr package
|
* repmgr-client.c - Command interpreter for the repmgr package
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This module is a command-line utility to easily setup a cluster of
|
* This module is a command-line utility to easily setup a cluster of
|
||||||
* hot standby servers for an HA environment
|
* hot standby servers for an HA environment
|
||||||
@@ -18,9 +18,6 @@
|
|||||||
* STANDBY FOLLOW
|
* STANDBY FOLLOW
|
||||||
* STANDBY SWITCHOVER
|
* STANDBY SWITCHOVER
|
||||||
*
|
*
|
||||||
* BDR REGISTER
|
|
||||||
* BDR UNREGISTER
|
|
||||||
*
|
|
||||||
* CLUSTER SHOW
|
* CLUSTER SHOW
|
||||||
* CLUSTER EVENT
|
* CLUSTER EVENT
|
||||||
* CLUSTER CROSSCHECK
|
* CLUSTER CROSSCHECK
|
||||||
@@ -67,7 +64,6 @@
|
|||||||
#include "repmgr-action-primary.h"
|
#include "repmgr-action-primary.h"
|
||||||
#include "repmgr-action-standby.h"
|
#include "repmgr-action-standby.h"
|
||||||
#include "repmgr-action-witness.h"
|
#include "repmgr-action-witness.h"
|
||||||
#include "repmgr-action-bdr.h"
|
|
||||||
#include "repmgr-action-node.h"
|
#include "repmgr-action-node.h"
|
||||||
#include "repmgr-action-cluster.h"
|
#include "repmgr-action-cluster.h"
|
||||||
#include "repmgr-action-service.h"
|
#include "repmgr-action-service.h"
|
||||||
@@ -95,11 +91,17 @@ char path_buf[MAXLEN] = "";
|
|||||||
*/
|
*/
|
||||||
t_node_info target_node_info = T_NODE_INFO_INITIALIZER;
|
t_node_info target_node_info = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
|
/* used by create_replication_slot() */
|
||||||
|
static t_user_type ReplicationSlotUser = USER_TYPE_UNKNOWN;
|
||||||
|
|
||||||
/* Collate command line errors and warnings here for friendlier reporting */
|
/* Collate command line errors and warnings here for friendlier reporting */
|
||||||
static ItemList cli_errors = {NULL, NULL};
|
static ItemList cli_errors = {NULL, NULL};
|
||||||
static ItemList cli_warnings = {NULL, NULL};
|
static ItemList cli_warnings = {NULL, NULL};
|
||||||
|
|
||||||
|
static void _determine_replication_slot_user(PGconn *conn,
|
||||||
|
t_node_info *upstream_node_record,
|
||||||
|
char **replication_user);
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char **argv)
|
main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
@@ -174,9 +176,8 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
/* set default user for -R/--remote-user */
|
/* set default user for -R/--remote-user */
|
||||||
{
|
{
|
||||||
struct passwd *pw = NULL;
|
struct passwd *pw = getpwuid(geteuid());
|
||||||
|
|
||||||
pw = getpwuid(geteuid());
|
|
||||||
if (pw == NULL)
|
if (pw == NULL)
|
||||||
{
|
{
|
||||||
fprintf(stderr, _("could not get current user name: %s\n"), strerror(errno));
|
fprintf(stderr, _("could not get current user name: %s\n"), strerror(errno));
|
||||||
@@ -287,8 +288,7 @@ main(int argc, char **argv)
|
|||||||
* These are the standard database connection options; with
|
* These are the standard database connection options; with
|
||||||
* the exception of -d/--dbname (which could be a conninfo
|
* the exception of -d/--dbname (which could be a conninfo
|
||||||
* string) we'll also set these values in "source_conninfo"
|
* string) we'll also set these values in "source_conninfo"
|
||||||
* (overwriting preset values from environment variables). XXX
|
* (overwriting preset values from environment variables).
|
||||||
* check this is same as psql
|
|
||||||
*/
|
*/
|
||||||
/* -d/--dbname */
|
/* -d/--dbname */
|
||||||
case 'd':
|
case 'd':
|
||||||
@@ -431,8 +431,8 @@ main(int argc, char **argv)
|
|||||||
runtime_options.without_barman = true;
|
runtime_options.without_barman = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OPT_RECOVERY_CONF_ONLY:
|
case OPT_REPLICATION_CONF_ONLY:
|
||||||
runtime_options.recovery_conf_only = true;
|
runtime_options.replication_conf_only = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
||||||
@@ -505,6 +505,10 @@ main(int argc, char **argv)
|
|||||||
runtime_options.downstream = true;
|
runtime_options.downstream = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OPT_UPSTREAM:
|
||||||
|
runtime_options.upstream = true;
|
||||||
|
break;
|
||||||
|
|
||||||
case OPT_REPLICATION_LAG:
|
case OPT_REPLICATION_LAG:
|
||||||
runtime_options.replication_lag = true;
|
runtime_options.replication_lag = true;
|
||||||
break;
|
break;
|
||||||
@@ -533,6 +537,10 @@ main(int argc, char **argv)
|
|||||||
runtime_options.data_directory_config = true;
|
runtime_options.data_directory_config = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OPT_REPLICATION_CONFIG_OWNER:
|
||||||
|
runtime_options.replication_config_owner = true;
|
||||||
|
break;
|
||||||
|
|
||||||
/*--------------------
|
/*--------------------
|
||||||
* "node rejoin" options
|
* "node rejoin" options
|
||||||
*--------------------
|
*--------------------
|
||||||
@@ -812,7 +820,6 @@ main(int argc, char **argv)
|
|||||||
* { PRIMARY | MASTER } REGISTER |
|
* { PRIMARY | MASTER } REGISTER |
|
||||||
* STANDBY { REGISTER | UNREGISTER | CLONE [node] | PROMOTE | FOLLOW [node] | SWITCHOVER } |
|
* STANDBY { REGISTER | UNREGISTER | CLONE [node] | PROMOTE | FOLLOW [node] | SWITCHOVER } |
|
||||||
* WITNESS { CREATE | REGISTER | UNREGISTER }
|
* WITNESS { CREATE | REGISTER | UNREGISTER }
|
||||||
* BDR { REGISTER | UNREGISTER } |
|
|
||||||
* NODE { STATUS | CHECK | REJOIN | SERVICE } |
|
* NODE { STATUS | CHECK | REJOIN | SERVICE } |
|
||||||
* CLUSTER { CROSSCHECK | MATRIX | SHOW | EVENT | CLEANUP }
|
* CLUSTER { CROSSCHECK | MATRIX | SHOW | EVENT | CLEANUP }
|
||||||
* SERVICE { STATUS | PAUSE | UNPAUSE | START | STOP }
|
* SERVICE { STATUS | PAUSE | UNPAUSE | START | STOP }
|
||||||
@@ -882,6 +889,7 @@ main(int argc, char **argv)
|
|||||||
else if (strcasecmp(repmgr_action, "STATUS") == 0)
|
else if (strcasecmp(repmgr_action, "STATUS") == 0)
|
||||||
action = NODE_STATUS;
|
action = NODE_STATUS;
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (strcasecmp(repmgr_command, "WITNESS") == 0)
|
else if (strcasecmp(repmgr_command, "WITNESS") == 0)
|
||||||
{
|
{
|
||||||
if (help_option == true)
|
if (help_option == true)
|
||||||
@@ -894,23 +902,6 @@ main(int argc, char **argv)
|
|||||||
else if (strcasecmp(repmgr_action, "UNREGISTER") == 0)
|
else if (strcasecmp(repmgr_action, "UNREGISTER") == 0)
|
||||||
action = WITNESS_UNREGISTER;
|
action = WITNESS_UNREGISTER;
|
||||||
}
|
}
|
||||||
else if (strcasecmp(repmgr_command, "BDR") == 0)
|
|
||||||
{
|
|
||||||
if (help_option == true)
|
|
||||||
{
|
|
||||||
do_bdr_help();
|
|
||||||
exit(SUCCESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (strcasecmp(repmgr_action, "REGISTER") == 0)
|
|
||||||
action = BDR_REGISTER;
|
|
||||||
else if (strcasecmp(repmgr_action, "UNREGISTER") == 0)
|
|
||||||
action = BDR_UNREGISTER;
|
|
||||||
else if (strcasecmp(repmgr_action, "CHECK") == 0)
|
|
||||||
action = NODE_CHECK;
|
|
||||||
else if (strcasecmp(repmgr_action, "STATUS") == 0)
|
|
||||||
action = NODE_STATUS;
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (strcasecmp(repmgr_command, "NODE") == 0)
|
else if (strcasecmp(repmgr_command, "NODE") == 0)
|
||||||
{
|
{
|
||||||
@@ -1205,7 +1196,7 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* If --dry-run specified, ensure log_level is at least LOG_INFO, regardless
|
* If --dry-run specified, ensure log_level is at least LOG_INFO, regardless
|
||||||
* of what's in the configuration file or -L/--log-level paremeter, otherwise
|
* of what's in the configuration file or -L/--log-level parameter, otherwise
|
||||||
* some or output might not be displayed.
|
* some or output might not be displayed.
|
||||||
*/
|
*/
|
||||||
if (runtime_options.dry_run == true)
|
if (runtime_options.dry_run == true)
|
||||||
@@ -1350,13 +1341,6 @@ main(int argc, char **argv)
|
|||||||
case WITNESS_UNREGISTER:
|
case WITNESS_UNREGISTER:
|
||||||
do_witness_unregister();
|
do_witness_unregister();
|
||||||
break;
|
break;
|
||||||
/* BDR */
|
|
||||||
case BDR_REGISTER:
|
|
||||||
do_bdr_register();
|
|
||||||
break;
|
|
||||||
case BDR_UNREGISTER:
|
|
||||||
do_bdr_unregister();
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* NODE */
|
/* NODE */
|
||||||
case NODE_STATUS:
|
case NODE_STATUS:
|
||||||
@@ -1429,9 +1413,6 @@ main(int argc, char **argv)
|
|||||||
*
|
*
|
||||||
* Messages will be added to the command line warning and error lists
|
* Messages will be added to the command line warning and error lists
|
||||||
* as appropriate.
|
* as appropriate.
|
||||||
*
|
|
||||||
* XXX for each individual actions, check only required actions
|
|
||||||
* for non-required actions check warn if provided
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -1487,10 +1468,15 @@ check_cli_parameters(const int action)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XXX if -D/--pgdata provided, and also
|
* If -D/--pgdata was provided, but config_file_options.pgdata
|
||||||
* config_file_options.pgdata, warn -D/--pgdata will be
|
* is set, warn that -D/--pgdata will be ignored.
|
||||||
* ignored
|
|
||||||
*/
|
*/
|
||||||
|
if (runtime_options.data_dir[0] && config_file_options.data_directory[0])
|
||||||
|
{
|
||||||
|
item_list_append(&cli_warnings,
|
||||||
|
_("-D/--pgdata will be ignored if a repmgr configuration file is provided"));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
if (*runtime_options.upstream_conninfo)
|
if (*runtime_options.upstream_conninfo)
|
||||||
{
|
{
|
||||||
@@ -1674,7 +1660,6 @@ check_cli_parameters(const int action)
|
|||||||
case STANDBY_CLONE:
|
case STANDBY_CLONE:
|
||||||
case STANDBY_REGISTER:
|
case STANDBY_REGISTER:
|
||||||
case STANDBY_FOLLOW:
|
case STANDBY_FOLLOW:
|
||||||
case BDR_REGISTER:
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
item_list_append_format(&cli_warnings,
|
item_list_append_format(&cli_warnings,
|
||||||
@@ -1705,7 +1690,24 @@ check_cli_parameters(const int action)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (runtime_options.recovery_conf_only == true)
|
if (runtime_options.superuser[0])
|
||||||
|
{
|
||||||
|
switch (action)
|
||||||
|
{
|
||||||
|
case STANDBY_CLONE:
|
||||||
|
case STANDBY_SWITCHOVER:
|
||||||
|
case NODE_CHECK:
|
||||||
|
case NODE_SERVICE:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
item_list_append_format(&cli_warnings,
|
||||||
|
_("--superuser ignored when executing %s"),
|
||||||
|
action_name(action));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (runtime_options.replication_conf_only == true)
|
||||||
{
|
{
|
||||||
switch (action)
|
switch (action)
|
||||||
{
|
{
|
||||||
@@ -2213,7 +2215,6 @@ format_node_status(t_node_info *node_info, PQExpBufferData *node_status, PQExpBu
|
|||||||
|
|
||||||
break;
|
break;
|
||||||
case WITNESS:
|
case WITNESS:
|
||||||
case BDR:
|
|
||||||
{
|
{
|
||||||
/* node is reachable */
|
/* node is reachable */
|
||||||
if (node_info->node_status == NODE_STATUS_UP)
|
if (node_info->node_status == NODE_STATUS_UP)
|
||||||
@@ -2273,8 +2274,9 @@ format_node_status(t_node_info *node_info, PQExpBufferData *node_status, PQExpBu
|
|||||||
* Unable to retrieve the node's copy of its own record - copy the
|
* Unable to retrieve the node's copy of its own record - copy the
|
||||||
* name from our own copy of the record
|
* name from our own copy of the record
|
||||||
*/
|
*/
|
||||||
appendPQExpBufferStr(upstream,
|
appendPQExpBuffer(upstream,
|
||||||
node_info->upstream_node_name);
|
"? %s",
|
||||||
|
node_info->upstream_node_name);
|
||||||
}
|
}
|
||||||
else if (remote_node_rec.type == WITNESS)
|
else if (remote_node_rec.type == WITNESS)
|
||||||
{
|
{
|
||||||
@@ -2349,6 +2351,7 @@ format_node_status(t_node_info *node_info, PQExpBufferData *node_status, PQExpBu
|
|||||||
* connected to the upstream
|
* connected to the upstream
|
||||||
*/
|
*/
|
||||||
NodeAttached attached_to_upstream = NODE_ATTACHED_UNKNOWN;
|
NodeAttached attached_to_upstream = NODE_ATTACHED_UNKNOWN;
|
||||||
|
char *replication_state = NULL;
|
||||||
t_node_info upstream_node_rec = T_NODE_INFO_INITIALIZER;
|
t_node_info upstream_node_rec = T_NODE_INFO_INITIALIZER;
|
||||||
RecordStatus upstream_node_rec_found = get_node_record(node_info->conn,
|
RecordStatus upstream_node_rec_found = get_node_record(node_info->conn,
|
||||||
node_info->upstream_node_id,
|
node_info->upstream_node_id,
|
||||||
@@ -2376,7 +2379,7 @@ format_node_status(t_node_info *node_info, PQExpBufferData *node_status, PQExpBu
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
attached_to_upstream = is_downstream_node_attached(upstream_conn, node_info->node_name);
|
attached_to_upstream = is_downstream_node_attached(upstream_conn, node_info->node_name, &replication_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
PQfinish(upstream_conn);
|
PQfinish(upstream_conn);
|
||||||
@@ -2392,6 +2395,18 @@ format_node_status(t_node_info *node_info, PQExpBufferData *node_status, PQExpBu
|
|||||||
upstream_node_rec.node_name,
|
upstream_node_rec.node_name,
|
||||||
upstream_node_rec.node_id);
|
upstream_node_rec.node_id);
|
||||||
}
|
}
|
||||||
|
if (attached_to_upstream == NODE_NOT_ATTACHED)
|
||||||
|
{
|
||||||
|
appendPQExpBufferStr(upstream, "? ");
|
||||||
|
item_list_append_format(warnings,
|
||||||
|
"node \"%s\" (ID: %i) attached to its upstream node \"%s\" (ID: %i) in state \"%s\"",
|
||||||
|
node_info->node_name,
|
||||||
|
node_info->node_id,
|
||||||
|
upstream_node_rec.node_name,
|
||||||
|
upstream_node_rec.node_id,
|
||||||
|
replication_state);
|
||||||
|
}
|
||||||
|
|
||||||
else if (attached_to_upstream == NODE_DETACHED)
|
else if (attached_to_upstream == NODE_DETACHED)
|
||||||
{
|
{
|
||||||
appendPQExpBufferStr(upstream, "! ");
|
appendPQExpBufferStr(upstream, "! ");
|
||||||
@@ -2456,17 +2471,14 @@ action_name(const int action)
|
|||||||
return "STANDBY PROMOTE";
|
return "STANDBY PROMOTE";
|
||||||
case STANDBY_FOLLOW:
|
case STANDBY_FOLLOW:
|
||||||
return "STANDBY FOLLOW";
|
return "STANDBY FOLLOW";
|
||||||
|
case STANDBY_SWITCHOVER:
|
||||||
|
return "STANDBY SWITCHOVER";
|
||||||
|
|
||||||
case WITNESS_REGISTER:
|
case WITNESS_REGISTER:
|
||||||
return "WITNESS REGISTER";
|
return "WITNESS REGISTER";
|
||||||
case WITNESS_UNREGISTER:
|
case WITNESS_UNREGISTER:
|
||||||
return "WITNESS UNREGISTER";
|
return "WITNESS UNREGISTER";
|
||||||
|
|
||||||
case BDR_REGISTER:
|
|
||||||
return "BDR REGISTER";
|
|
||||||
case BDR_UNREGISTER:
|
|
||||||
return "BDR UNREGISTER";
|
|
||||||
|
|
||||||
case NODE_STATUS:
|
case NODE_STATUS:
|
||||||
return "NODE STATUS";
|
return "NODE STATUS";
|
||||||
case NODE_CHECK:
|
case NODE_CHECK:
|
||||||
@@ -2475,9 +2487,13 @@ action_name(const int action)
|
|||||||
return "NODE REJOIN";
|
return "NODE REJOIN";
|
||||||
case NODE_SERVICE:
|
case NODE_SERVICE:
|
||||||
return "NODE SERVICE";
|
return "NODE SERVICE";
|
||||||
|
case NODE_CONTROL:
|
||||||
|
return "NODE CONTROL";
|
||||||
|
|
||||||
case CLUSTER_SHOW:
|
case CLUSTER_SHOW:
|
||||||
return "CLUSTER SHOW";
|
return "CLUSTER SHOW";
|
||||||
|
case CLUSTER_CLEANUP:
|
||||||
|
return "CLUSTER CLEANUP";
|
||||||
case CLUSTER_EVENT:
|
case CLUSTER_EVENT:
|
||||||
return "CLUSTER EVENT";
|
return "CLUSTER EVENT";
|
||||||
case CLUSTER_MATRIX:
|
case CLUSTER_MATRIX:
|
||||||
@@ -2602,7 +2618,6 @@ do_help(void)
|
|||||||
printf(_("Usage:\n"));
|
printf(_("Usage:\n"));
|
||||||
printf(_(" %s [OPTIONS] primary {register|unregister}\n"), progname());
|
printf(_(" %s [OPTIONS] primary {register|unregister}\n"), progname());
|
||||||
printf(_(" %s [OPTIONS] standby {register|unregister|clone|promote|follow|switchover}\n"), progname());
|
printf(_(" %s [OPTIONS] standby {register|unregister|clone|promote|follow|switchover}\n"), progname());
|
||||||
printf(_(" %s [OPTIONS] bdr {register|unregister}\n"), progname());
|
|
||||||
printf(_(" %s [OPTIONS] node {status|check|rejoin|service}\n"), progname());
|
printf(_(" %s [OPTIONS] node {status|check|rejoin|service}\n"), progname());
|
||||||
printf(_(" %s [OPTIONS] cluster {show|event|matrix|crosscheck|cleanup}\n"), progname());
|
printf(_(" %s [OPTIONS] cluster {show|event|matrix|crosscheck|cleanup}\n"), progname());
|
||||||
printf(_(" %s [OPTIONS] witness {register|unregister}\n"), progname());
|
printf(_(" %s [OPTIONS] witness {register|unregister}\n"), progname());
|
||||||
@@ -2611,7 +2626,7 @@ do_help(void)
|
|||||||
|
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
printf(_(" Execute \"%s {primary|standby|bdr|node|cluster|witness|service} --help\" to see command-specific options\n"), progname());
|
printf(_(" Execute \"%s {primary|standby|node|cluster|witness|service} --help\" to see command-specific options\n"), progname());
|
||||||
|
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
@@ -2641,7 +2656,6 @@ do_help(void)
|
|||||||
|
|
||||||
printf(_(" -p, --port=PORT database server port (default: \"%s\")\n"), runtime_options.port);
|
printf(_(" -p, --port=PORT database server port (default: \"%s\")\n"), runtime_options.port);
|
||||||
printf(_(" -U, --username=USERNAME database user name to connect as (default: \"%s\")\n"), runtime_options.username);
|
printf(_(" -U, --username=USERNAME database user name to connect as (default: \"%s\")\n"), runtime_options.username);
|
||||||
printf(_(" -S, --superuser=USERNAME superuser to use, if repmgr user is not superuser\n"));
|
|
||||||
|
|
||||||
puts("");
|
puts("");
|
||||||
|
|
||||||
@@ -2735,14 +2749,7 @@ create_repmgr_extension(PGconn *conn)
|
|||||||
|
|
||||||
/* 4. Create extension */
|
/* 4. Create extension */
|
||||||
|
|
||||||
initPQExpBuffer(&query);
|
res = PQexec(schema_create_conn, "CREATE EXTENSION repmgr");
|
||||||
|
|
||||||
wrap_ddl_query(&query, config_file_options.replication_type,
|
|
||||||
"CREATE EXTENSION repmgr");
|
|
||||||
|
|
||||||
res = PQexec(schema_create_conn, query.data);
|
|
||||||
|
|
||||||
termPQExpBuffer(&query);
|
|
||||||
|
|
||||||
if ((PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK))
|
if ((PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK))
|
||||||
{
|
{
|
||||||
@@ -2763,13 +2770,13 @@ create_repmgr_extension(PGconn *conn)
|
|||||||
{
|
{
|
||||||
initPQExpBuffer(&query);
|
initPQExpBuffer(&query);
|
||||||
|
|
||||||
wrap_ddl_query(&query, config_file_options.replication_type,
|
appendPQExpBuffer(&query,
|
||||||
"GRANT USAGE ON SCHEMA repmgr TO %s",
|
"GRANT USAGE ON SCHEMA repmgr TO %s",
|
||||||
userinfo.username);
|
userinfo.username);
|
||||||
|
|
||||||
res = PQexec(schema_create_conn, query.data);
|
res = PQexec(schema_create_conn, query.data);
|
||||||
|
|
||||||
termPQExpBuffer(&query);
|
termPQExpBuffer(&query);
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
{
|
{
|
||||||
log_error(_("unable to grant usage on \"repmgr\" extension to %s:\n %s"),
|
log_error(_("unable to grant usage on \"repmgr\" extension to %s:\n %s"),
|
||||||
@@ -2784,12 +2791,12 @@ create_repmgr_extension(PGconn *conn)
|
|||||||
}
|
}
|
||||||
|
|
||||||
initPQExpBuffer(&query);
|
initPQExpBuffer(&query);
|
||||||
wrap_ddl_query(&query, config_file_options.replication_type,
|
|
||||||
"GRANT ALL ON ALL TABLES IN SCHEMA repmgr TO %s",
|
appendPQExpBuffer(&query,
|
||||||
userinfo.username);
|
"GRANT ALL ON ALL TABLES IN SCHEMA repmgr TO %s",
|
||||||
|
userinfo.username);
|
||||||
|
|
||||||
res = PQexec(schema_create_conn, query.data);
|
res = PQexec(schema_create_conn, query.data);
|
||||||
|
|
||||||
termPQExpBuffer(&query);
|
termPQExpBuffer(&query);
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
@@ -2844,15 +2851,25 @@ create_repmgr_extension(PGconn *conn)
|
|||||||
int
|
int
|
||||||
check_server_version(PGconn *conn, char *server_type, bool exit_on_error, char *server_version_string)
|
check_server_version(PGconn *conn, char *server_type, bool exit_on_error, char *server_version_string)
|
||||||
{
|
{
|
||||||
int conn_server_version_num = get_server_version(conn, server_version_string);
|
char version_string[MAXVERSIONSTR] = "";
|
||||||
|
int conn_server_version_num = get_server_version(conn, version_string);
|
||||||
|
|
||||||
|
/* Copy the version string, if the caller wants it */
|
||||||
|
if (server_version_string != NULL)
|
||||||
|
strncpy(server_version_string, version_string, MAXVERSIONSTR);
|
||||||
|
|
||||||
if (conn_server_version_num < MIN_SUPPORTED_VERSION_NUM)
|
if (conn_server_version_num < MIN_SUPPORTED_VERSION_NUM)
|
||||||
{
|
{
|
||||||
if (conn_server_version_num > 0)
|
if (conn_server_version_num > 0)
|
||||||
|
{
|
||||||
log_error(_("%s requires %s to be PostgreSQL %s or later"),
|
log_error(_("%s requires %s to be PostgreSQL %s or later"),
|
||||||
progname(),
|
progname(),
|
||||||
server_type,
|
server_type,
|
||||||
MIN_SUPPORTED_VERSION);
|
MIN_SUPPORTED_VERSION);
|
||||||
|
log_detail(_("%s server version is %s"),
|
||||||
|
server_type,
|
||||||
|
version_string);
|
||||||
|
}
|
||||||
|
|
||||||
if (exit_on_error == true)
|
if (exit_on_error == true)
|
||||||
{
|
{
|
||||||
@@ -2863,6 +2880,38 @@ check_server_version(PGconn *conn, char *server_type, bool exit_on_error, char *
|
|||||||
return UNKNOWN_SERVER_VERSION_NUM;
|
return UNKNOWN_SERVER_VERSION_NUM;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If it's clear a particular repmgr feature branch won't be able to support
|
||||||
|
* PostgreSQL from a particular PostgreSQL release onwards (e.g. 4.4 with PostgreSQL
|
||||||
|
* 12 and later due to recovery.conf removal), set MAX_UNSUPPORTED_VERSION and
|
||||||
|
* MAX_UNSUPPORTED_VERSION_NUM in "repmgr.h" to define the first PostgreSQL
|
||||||
|
* version which can't be suppored.
|
||||||
|
*/
|
||||||
|
#ifdef MAX_UNSUPPORTED_VERSION_NUM
|
||||||
|
if (conn_server_version_num >= MAX_UNSUPPORTED_VERSION_NUM)
|
||||||
|
{
|
||||||
|
if (conn_server_version_num > 0)
|
||||||
|
{
|
||||||
|
log_error(_("%s %s does not support PostgreSQL %s or later"),
|
||||||
|
progname(),
|
||||||
|
REPMGR_VERSION,
|
||||||
|
MAX_UNSUPPORTED_VERSION);
|
||||||
|
log_detail(_("%s server version is %s"),
|
||||||
|
server_type,
|
||||||
|
version_string);
|
||||||
|
log_hint(_("For details of supported versions see: https://repmgr.org/docs/current/install-requirements.html#INSTALL-COMPATIBILITY-MATRIX"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (exit_on_error == true)
|
||||||
|
{
|
||||||
|
PQfinish(conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
return UNKNOWN_SERVER_VERSION_NUM;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return conn_server_version_num;
|
return conn_server_version_num;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3602,45 +3651,401 @@ can_use_pg_rewind(PGconn *conn, const char *data_directory, PQExpBufferData *rea
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
// provided connection should be for the normal repmgr user
|
||||||
|
// upstream_node_record may be NULL or initialised to default values
|
||||||
|
bool
|
||||||
|
create_replication_slot(PGconn *conn, char *slot_name, t_node_info *upstream_node_record, PQExpBufferData *error_msg)
|
||||||
|
{
|
||||||
|
PGconn *slot_conn = NULL;
|
||||||
|
bool use_replication_protocol = false;
|
||||||
|
bool success = true;
|
||||||
|
char *replication_user = NULL;
|
||||||
|
|
||||||
|
_determine_replication_slot_user(conn, upstream_node_record, &replication_user);
|
||||||
|
/*
|
||||||
|
* If called in --dry-run context, if the replication slot user is not the
|
||||||
|
* repmgr user, attempt to validate the connection.
|
||||||
|
*/
|
||||||
|
if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
switch (ReplicationSlotUser)
|
||||||
|
{
|
||||||
|
case USER_TYPE_UNKNOWN:
|
||||||
|
log_error("unable to determine user for replication slot creation");
|
||||||
|
return false;
|
||||||
|
case REPMGR_USER:
|
||||||
|
log_info(_("replication slots will be created by user \"%s\""),
|
||||||
|
PQuser(conn));
|
||||||
|
return true;
|
||||||
|
|
||||||
|
case REPLICATION_USER_NODE:
|
||||||
|
case REPLICATION_USER_OPT:
|
||||||
|
{
|
||||||
|
PGconn *repl_conn = duplicate_connection(conn,
|
||||||
|
replication_user,
|
||||||
|
true);
|
||||||
|
if (repl_conn == NULL || PQstatus(repl_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_error(_("unable to create replication connection as user \"%s\""),
|
||||||
|
replication_user);
|
||||||
|
log_detail("%s", PQerrorMessage(repl_conn));
|
||||||
|
|
||||||
|
PQfinish(repl_conn);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
log_info(_("replication slots will be created by replication user \"%s\""),
|
||||||
|
replication_user);
|
||||||
|
PQfinish(repl_conn);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case SUPERUSER:
|
||||||
|
{
|
||||||
|
PGconn *superuser_conn = duplicate_connection(conn,
|
||||||
|
runtime_options.superuser,
|
||||||
|
false);
|
||||||
|
if (superuser_conn == NULL || PQstatus(superuser_conn )!= CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_error(_("unable to create superuser connection as user \"%s\""),
|
||||||
|
runtime_options.superuser);
|
||||||
|
log_detail("%s", PQerrorMessage(superuser_conn));
|
||||||
|
|
||||||
|
PQfinish(superuser_conn);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
log_info(_("replication slots will be created by superuser \"%s\""),
|
||||||
|
runtime_options.superuser);
|
||||||
|
PQfinish(superuser_conn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we can't create a replication slot with the connection provided to
|
||||||
|
* the function, create an connection with appropriate permissions.
|
||||||
|
*/
|
||||||
|
switch (ReplicationSlotUser)
|
||||||
|
{
|
||||||
|
case USER_TYPE_UNKNOWN:
|
||||||
|
log_error("unable to determine user for replication slot creation");
|
||||||
|
return false;
|
||||||
|
case REPMGR_USER:
|
||||||
|
slot_conn = conn;
|
||||||
|
log_info(_("creating replication slot as user \"%s\""),
|
||||||
|
PQuser(conn));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case REPLICATION_USER_NODE:
|
||||||
|
case REPLICATION_USER_OPT:
|
||||||
|
{
|
||||||
|
slot_conn = duplicate_connection(conn,
|
||||||
|
replication_user,
|
||||||
|
true);
|
||||||
|
if (slot_conn == NULL || PQstatus(slot_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_error(_("unable to create replication connection as user \"%s\""),
|
||||||
|
runtime_options.replication_user);
|
||||||
|
log_detail("%s", PQerrorMessage(slot_conn));
|
||||||
|
|
||||||
|
PQfinish(slot_conn);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
use_replication_protocol = true;
|
||||||
|
log_info(_("creating replication slot as replication user \"%s\""),
|
||||||
|
replication_user);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SUPERUSER:
|
||||||
|
{
|
||||||
|
slot_conn = duplicate_connection(conn,
|
||||||
|
runtime_options.superuser,
|
||||||
|
false);
|
||||||
|
if (slot_conn == NULL || PQstatus(slot_conn )!= CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_error(_("unable to create super connection as user \"%s\""),
|
||||||
|
runtime_options.superuser);
|
||||||
|
log_detail("%s", PQerrorMessage(slot_conn));
|
||||||
|
|
||||||
|
PQfinish(slot_conn);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
log_info(_("creating replication slot as superuser \"%s\""),
|
||||||
|
runtime_options.superuser);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (use_replication_protocol == true)
|
||||||
|
{
|
||||||
|
success = create_replication_slot_replprot(conn, slot_conn, slot_name, error_msg);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
success = create_replication_slot_sql(slot_conn, slot_name, error_msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (slot_conn != conn)
|
||||||
|
PQfinish(slot_conn);
|
||||||
|
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool
|
||||||
drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name)
|
drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name)
|
||||||
{
|
{
|
||||||
|
t_node_info node_record = T_NODE_INFO_INITIALIZER;
|
||||||
t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
|
t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
|
||||||
RecordStatus record_status = get_slot_record(conn, slot_name, &slot_info);
|
RecordStatus record_status;
|
||||||
|
|
||||||
|
char *replication_user = NULL;
|
||||||
|
bool success = true;
|
||||||
|
|
||||||
|
if (node_id != UNKNOWN_NODE_ID)
|
||||||
|
{
|
||||||
|
record_status = get_node_record(conn, node_id, &node_record);
|
||||||
|
}
|
||||||
|
|
||||||
|
_determine_replication_slot_user(conn, &node_record, &replication_user);
|
||||||
|
|
||||||
|
record_status = get_slot_record(conn, slot_name, &slot_info);
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "attempting to delete slot \"%s\" on node %i",
|
log_verbose(LOG_DEBUG, "attempting to delete slot \"%s\" on node %i",
|
||||||
slot_name, node_id);
|
slot_name, node_id);
|
||||||
|
|
||||||
if (record_status != RECORD_FOUND)
|
if (record_status != RECORD_FOUND)
|
||||||
{
|
{
|
||||||
/* this is a good thing */
|
/* this is not a bad good thing */
|
||||||
log_verbose(LOG_INFO,
|
log_verbose(LOG_INFO,
|
||||||
_("slot \"%s\" does not exist on node %i, nothing to remove"),
|
_("slot \"%s\" does not exist on node %i, nothing to remove"),
|
||||||
slot_name, node_id);
|
slot_name, node_id);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
if (slot_info.active == false)
|
|
||||||
{
|
|
||||||
if (drop_replication_slot(conn, slot_name) == true)
|
|
||||||
{
|
|
||||||
log_notice(_("replication slot \"%s\" deleted on node %i"), slot_name, node_id);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
log_error(_("unable to delete replication slot \"%s\" on node %i"), slot_name, node_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
if (slot_info.active == false)
|
||||||
* if active replication slot exists, call Houston as we have a
|
{
|
||||||
* problem
|
if (drop_replication_slot_sql(conn, slot_name) == true)
|
||||||
*/
|
{
|
||||||
|
log_notice(_("replication slot \"%s\" deleted on node %i"), slot_name, node_id);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_warning(_("replication slot \"%s\" is still active on node %i"), slot_name, node_id);
|
log_error(_("unable to delete replication slot \"%s\" on node %i"), slot_name, node_id);
|
||||||
|
success = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If an active replication slot exists, call Houston as we have a
|
||||||
|
* problem.
|
||||||
|
*/
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_warning(_("replication slot \"%s\" is still active on node %i"), slot_name, node_id);
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
_determine_replication_slot_user(PGconn *conn, t_node_info *upstream_node_record, char **replication_user)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If not previously done, work out which user will be responsible
|
||||||
|
* for creating replication slots.
|
||||||
|
*/
|
||||||
|
if (ReplicationSlotUser == USER_TYPE_UNKNOWN)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Is the repmgr user a superuser?
|
||||||
|
*/
|
||||||
|
if (is_superuser_connection(conn, NULL))
|
||||||
|
{
|
||||||
|
ReplicationSlotUser = REPMGR_USER;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Does the repmgr user have the REPLICATION role?
|
||||||
|
* Note we don't care here whether the repmgr user can actually
|
||||||
|
* make a replication connection, we're just confirming that the
|
||||||
|
* connection we have has the appropriate permissions.
|
||||||
|
*/
|
||||||
|
else if (is_replication_role(conn, NULL))
|
||||||
|
{
|
||||||
|
ReplicationSlotUser = REPMGR_USER;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Is a superuser provided with --superuser?
|
||||||
|
* We'll check later whether we can make a connection as that user.
|
||||||
|
*/
|
||||||
|
else if (runtime_options.superuser[0] != '\0')
|
||||||
|
{
|
||||||
|
ReplicationSlotUser = SUPERUSER;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Is a replication user provided with --replication-user?
|
||||||
|
* We'll check later whether we can make a replication connection as that user.
|
||||||
|
* Overrides any replication user defined in the upstream node record.
|
||||||
|
*/
|
||||||
|
else if (runtime_options.replication_user[0] != '\0')
|
||||||
|
{
|
||||||
|
ReplicationSlotUser = REPLICATION_USER_OPT;
|
||||||
|
*replication_user = runtime_options.replication_user;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Is the upstream's node record provided, and does it have a different
|
||||||
|
* replication user?
|
||||||
|
* We'll check later whether we can make a replication connection as that user.
|
||||||
|
*/
|
||||||
|
else if (upstream_node_record != NULL && upstream_node_record->node_id != UNKNOWN_NODE_ID
|
||||||
|
&& strncmp(upstream_node_record->repluser, PQuser(conn), NAMEDATALEN) != 0)
|
||||||
|
{
|
||||||
|
ReplicationSlotUser = REPLICATION_USER_NODE;
|
||||||
|
*replication_user = upstream_node_record->repluser;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool
|
||||||
|
check_replication_slots_available(int node_id, PGconn* conn)
|
||||||
|
{
|
||||||
|
int max_replication_slots = UNKNOWN_VALUE;
|
||||||
|
int free_slots = get_free_replication_slot_count(conn, &max_replication_slots);
|
||||||
|
|
||||||
|
if (free_slots < 0)
|
||||||
|
{
|
||||||
|
log_error(_("unable to determine number of free replication slots on node %i"),
|
||||||
|
node_id);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (free_slots == 0)
|
||||||
|
{
|
||||||
|
log_error(_("no free replication slots available on node %i"),
|
||||||
|
node_id);
|
||||||
|
log_hint(_("consider increasing \"max_replication_slots\" (current value: %i)"),
|
||||||
|
max_replication_slots);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else if (runtime_options.dry_run == true)
|
||||||
|
{
|
||||||
|
log_info(_("replication slots in use, %i free slots on node %i"),
|
||||||
|
node_id,
|
||||||
|
free_slots);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check whether the specified standby has joined to its upstream.
|
||||||
|
*
|
||||||
|
* This is used by "standby switchover" and "node rejoin" to check
|
||||||
|
* the success of a node rejoin operation.
|
||||||
|
*
|
||||||
|
* IMPORTANT: the timeout settings will be taken from the node where the check
|
||||||
|
* is performed, which might not be the standby itself.
|
||||||
|
*/
|
||||||
|
standy_join_status
|
||||||
|
check_standby_join(PGconn *upstream_conn, t_node_info *upstream_node_record, t_node_info *standby_node_record)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
bool available = false;
|
||||||
|
|
||||||
|
for (i = 0; i < config_file_options.standby_reconnect_timeout; i++)
|
||||||
|
{
|
||||||
|
if (is_server_available(config_file_options.conninfo))
|
||||||
|
{
|
||||||
|
log_verbose(LOG_INFO, _("node \"%s\" (ID: %i) is pingable"),
|
||||||
|
standby_node_record->node_name,
|
||||||
|
standby_node_record->node_id);
|
||||||
|
available = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i % 5 == 0)
|
||||||
|
{
|
||||||
|
log_verbose(LOG_INFO, _("waiting for node \"%s\" (ID: %i) to respond to pings; %i of max %i attempts (parameter \"node_rejoin_timeout\")"),
|
||||||
|
standby_node_record->node_name,
|
||||||
|
standby_node_record->node_id,
|
||||||
|
i + 1,
|
||||||
|
config_file_options.node_rejoin_timeout);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_debug("sleeping 1 second waiting for node \"%s\" (ID: %i) to respond to pings; %i of max %i attempts",
|
||||||
|
standby_node_record->node_name,
|
||||||
|
standby_node_record->node_id,
|
||||||
|
i + 1,
|
||||||
|
config_file_options.node_rejoin_timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* node did not become available */
|
||||||
|
if (available == false)
|
||||||
|
{
|
||||||
|
return JOIN_FAIL_NO_PING;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; i < config_file_options.node_rejoin_timeout; i++)
|
||||||
|
{
|
||||||
|
char *node_state = NULL;
|
||||||
|
NodeAttached node_attached = is_downstream_node_attached(upstream_conn,
|
||||||
|
standby_node_record->node_name,
|
||||||
|
&node_state);
|
||||||
|
if (node_attached == NODE_ATTACHED)
|
||||||
|
{
|
||||||
|
log_verbose(LOG_INFO, _("node \"%s\" (ID: %i) has attached to its upstream node"),
|
||||||
|
standby_node_record->node_name,
|
||||||
|
standby_node_record->node_id);
|
||||||
|
return JOIN_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i % 5 == 0)
|
||||||
|
{
|
||||||
|
log_info(_("waiting for node \"%s\" (ID: %i) to connect to new primary; %i of max %i attempts (parameter \"node_rejoin_timeout\")"),
|
||||||
|
standby_node_record->node_name,
|
||||||
|
standby_node_record->node_id,
|
||||||
|
i + 1,
|
||||||
|
config_file_options.node_rejoin_timeout);
|
||||||
|
|
||||||
|
if (node_attached == NODE_NOT_ATTACHED)
|
||||||
|
{
|
||||||
|
log_detail(_("node \"%s\" (ID: %i) is currrently attached to its upstream node in state \"%s\""),
|
||||||
|
upstream_node_record->node_name,
|
||||||
|
standby_node_record->node_id,
|
||||||
|
node_state);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_detail(_("checking for record in node \"%s\"'s \"pg_stat_replication\" table where \"application_name\" is \"%s\""),
|
||||||
|
upstream_node_record->node_name,
|
||||||
|
standby_node_record->node_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_debug("sleeping 1 second waiting for node \"%s\" (ID: %i) to connect to new primary; %i of max %i attempts",
|
||||||
|
standby_node_record->node_name,
|
||||||
|
standby_node_record->node_id,
|
||||||
|
i + 1,
|
||||||
|
config_file_options.node_rejoin_timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return JOIN_FAIL_NO_REPLICATION;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -3654,30 +4059,15 @@ bool
|
|||||||
check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_record, bool is_rejoin)
|
check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_record, bool is_rejoin)
|
||||||
{
|
{
|
||||||
uint64 local_system_identifier = UNKNOWN_SYSTEM_IDENTIFIER;
|
uint64 local_system_identifier = UNKNOWN_SYSTEM_IDENTIFIER;
|
||||||
t_conninfo_param_list follow_target_repl_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
|
||||||
PGconn *follow_target_repl_conn = NULL;
|
PGconn *follow_target_repl_conn = NULL;
|
||||||
t_system_identification follow_target_identification = T_SYSTEM_IDENTIFICATION_INITIALIZER;
|
t_system_identification follow_target_identification = T_SYSTEM_IDENTIFICATION_INITIALIZER;
|
||||||
TimeLineHistoryEntry *follow_target_history = NULL;
|
|
||||||
bool success = true;
|
bool success = true;
|
||||||
|
|
||||||
const char *action = is_rejoin == true ? "rejoin" : "follow";
|
const char *action = is_rejoin == true ? "rejoin" : "follow";
|
||||||
|
|
||||||
/* check replication connection */
|
/* check replication connection */
|
||||||
initialize_conninfo_params(&follow_target_repl_conninfo, false);
|
follow_target_repl_conn = establish_replication_connection_from_conn(follow_target_conn,
|
||||||
|
follow_target_node_record->repluser);
|
||||||
conn_to_param_list(follow_target_conn, &follow_target_repl_conninfo);
|
|
||||||
|
|
||||||
if (strcmp(param_get(&follow_target_repl_conninfo, "user"), follow_target_node_record->repluser) != 0)
|
|
||||||
{
|
|
||||||
param_set(&follow_target_repl_conninfo, "user", follow_target_node_record->repluser);
|
|
||||||
param_set(&follow_target_repl_conninfo, "dbname", "replication");
|
|
||||||
}
|
|
||||||
|
|
||||||
param_set(&follow_target_repl_conninfo, "replication", "1");
|
|
||||||
|
|
||||||
follow_target_repl_conn = establish_db_connection_by_params(&follow_target_repl_conninfo, false);
|
|
||||||
|
|
||||||
free_conninfo_params(&follow_target_repl_conninfo);
|
|
||||||
|
|
||||||
if (PQstatus(follow_target_repl_conn) != CONNECTION_OK)
|
if (PQstatus(follow_target_repl_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
@@ -3775,7 +4165,8 @@ check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *fo
|
|||||||
/*
|
/*
|
||||||
* upstream has higher timeline - check where it forked off from this node's timeline
|
* upstream has higher timeline - check where it forked off from this node's timeline
|
||||||
*/
|
*/
|
||||||
follow_target_history = get_timeline_history(follow_target_repl_conn, local_tli + 1);
|
TimeLineHistoryEntry *follow_target_history = get_timeline_history(follow_target_repl_conn,
|
||||||
|
local_tli + 1);
|
||||||
|
|
||||||
if (follow_target_history == NULL)
|
if (follow_target_history == NULL)
|
||||||
{
|
{
|
||||||
@@ -3839,17 +4230,101 @@ check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *fo
|
|||||||
format_lsn(follow_target_history->end));
|
format_lsn(follow_target_history->end));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pfree(follow_target_history);
|
||||||
}
|
}
|
||||||
|
|
||||||
PQfinish(follow_target_repl_conn);
|
PQfinish(follow_target_repl_conn);
|
||||||
|
|
||||||
if (follow_target_history)
|
|
||||||
pfree(follow_target_history);
|
|
||||||
|
|
||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check that the replication configuration file is owned by the user who
|
||||||
|
* owns the data directory.
|
||||||
|
*/
|
||||||
|
extern bool
|
||||||
|
check_replication_config_owner(int pg_version, const char *data_directory, PQExpBufferData *error_msg, PQExpBufferData *detail_msg)
|
||||||
|
{
|
||||||
|
PQExpBufferData replication_config_file;
|
||||||
|
struct stat dirstat;
|
||||||
|
struct stat confstat;
|
||||||
|
|
||||||
|
if (stat(data_directory, &dirstat))
|
||||||
|
{
|
||||||
|
if (error_msg != NULL)
|
||||||
|
{
|
||||||
|
appendPQExpBuffer(error_msg,
|
||||||
|
"unable to check ownership of data directory \"%s\"",
|
||||||
|
data_directory);
|
||||||
|
appendPQExpBufferStr(detail_msg,
|
||||||
|
strerror(errno));
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
initPQExpBuffer(&replication_config_file);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&replication_config_file,
|
||||||
|
"%s/%s",
|
||||||
|
config_file_options.data_directory,
|
||||||
|
pg_version >= 120000 ? PG_AUTOCONF_FILENAME : RECOVERY_COMMAND_FILE);
|
||||||
|
|
||||||
|
stat(replication_config_file.data, &confstat);
|
||||||
|
|
||||||
|
if (confstat.st_uid == dirstat.st_uid)
|
||||||
|
{
|
||||||
|
termPQExpBuffer(&replication_config_file);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (error_msg != NULL)
|
||||||
|
{
|
||||||
|
char conf_owner[MAXLEN];
|
||||||
|
char dir_owner[MAXLEN];
|
||||||
|
struct passwd *pw;
|
||||||
|
|
||||||
|
pw = getpwuid(confstat.st_uid);
|
||||||
|
if (!pw)
|
||||||
|
{
|
||||||
|
maxlen_snprintf(conf_owner,
|
||||||
|
"(unknown user %i)",
|
||||||
|
confstat.st_uid);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
strncpy(conf_owner, pw->pw_name, MAXLEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
pw = getpwuid(dirstat.st_uid);
|
||||||
|
|
||||||
|
if (!pw)
|
||||||
|
{
|
||||||
|
maxlen_snprintf(conf_owner,
|
||||||
|
"(unknown user %i)",
|
||||||
|
dirstat.st_uid);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
strncpy(dir_owner, pw->pw_name, MAXLEN);
|
||||||
|
}
|
||||||
|
|
||||||
|
appendPQExpBuffer(error_msg,
|
||||||
|
"ownership error for file \"%s\"",
|
||||||
|
replication_config_file.data);
|
||||||
|
appendPQExpBuffer(detail_msg,
|
||||||
|
"file owner is \"%s\", data directory owner is \"%s\"",
|
||||||
|
conf_owner,
|
||||||
|
dir_owner);
|
||||||
|
}
|
||||||
|
|
||||||
|
termPQExpBuffer(&replication_config_file);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Simple check to see if "shared_preload_libraries" includes "repmgr".
|
* Simple check to see if "shared_preload_libraries" includes "repmgr".
|
||||||
* Parsing "shared_preload_libraries" is non-trivial, as it's potentially
|
* Parsing "shared_preload_libraries" is non-trivial, as it's potentially
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr-client.h
|
* repmgr-client.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -34,23 +34,21 @@
|
|||||||
#define STANDBY_SWITCHOVER 8
|
#define STANDBY_SWITCHOVER 8
|
||||||
#define WITNESS_REGISTER 9
|
#define WITNESS_REGISTER 9
|
||||||
#define WITNESS_UNREGISTER 10
|
#define WITNESS_UNREGISTER 10
|
||||||
#define BDR_REGISTER 11
|
#define NODE_STATUS 11
|
||||||
#define BDR_UNREGISTER 12
|
#define NODE_CHECK 12
|
||||||
#define NODE_STATUS 13
|
#define NODE_SERVICE 13
|
||||||
#define NODE_CHECK 14
|
#define NODE_REJOIN 14
|
||||||
#define NODE_SERVICE 15
|
#define NODE_CONTROL 15
|
||||||
#define NODE_REJOIN 16
|
#define CLUSTER_SHOW 16
|
||||||
#define NODE_CONTROL 17
|
#define CLUSTER_CLEANUP 17
|
||||||
#define CLUSTER_SHOW 18
|
#define CLUSTER_MATRIX 18
|
||||||
#define CLUSTER_CLEANUP 19
|
#define CLUSTER_CROSSCHECK 19
|
||||||
#define CLUSTER_MATRIX 20
|
#define CLUSTER_EVENT 20
|
||||||
#define CLUSTER_CROSSCHECK 21
|
#define SERVICE_STATUS 21
|
||||||
#define CLUSTER_EVENT 22
|
#define SERVICE_PAUSE 22
|
||||||
#define SERVICE_STATUS 23
|
#define SERVICE_UNPAUSE 23
|
||||||
#define SERVICE_PAUSE 24
|
#define DAEMON_START 24
|
||||||
#define SERVICE_UNPAUSE 25
|
#define DAEMON_STOP 25
|
||||||
#define DAEMON_START 26
|
|
||||||
#define DAEMON_STOP 27
|
|
||||||
|
|
||||||
/* command line options without short versions */
|
/* command line options without short versions */
|
||||||
#define OPT_HELP 1001
|
#define OPT_HELP 1001
|
||||||
@@ -83,23 +81,25 @@
|
|||||||
#define OPT_SIBLINGS_FOLLOW 1028
|
#define OPT_SIBLINGS_FOLLOW 1028
|
||||||
#define OPT_ROLE 1029
|
#define OPT_ROLE 1029
|
||||||
#define OPT_DOWNSTREAM 1030
|
#define OPT_DOWNSTREAM 1030
|
||||||
#define OPT_SLOTS 1031
|
#define OPT_UPSTREAM 1031
|
||||||
#define OPT_CONFIG_ARCHIVE_DIR 1032
|
#define OPT_SLOTS 1032
|
||||||
#define OPT_HAS_PASSFILE 1033
|
#define OPT_CONFIG_ARCHIVE_DIR 1033
|
||||||
#define OPT_WAIT_START 1034
|
#define OPT_HAS_PASSFILE 1034
|
||||||
#define OPT_REPL_CONN 1035
|
#define OPT_WAIT_START 1035
|
||||||
#define OPT_REMOTE_NODE_ID 1036
|
#define OPT_REPL_CONN 1036
|
||||||
#define OPT_RECOVERY_CONF_ONLY 1037
|
#define OPT_REMOTE_NODE_ID 1037
|
||||||
#define OPT_NO_WAIT 1038
|
#define OPT_REPLICATION_CONF_ONLY 1038
|
||||||
#define OPT_MISSING_SLOTS 1039
|
#define OPT_NO_WAIT 1039
|
||||||
#define OPT_REPMGRD_NO_PAUSE 1040
|
#define OPT_MISSING_SLOTS 1040
|
||||||
#define OPT_VERSION_NUMBER 1041
|
#define OPT_REPMGRD_NO_PAUSE 1041
|
||||||
#define OPT_DATA_DIRECTORY_CONFIG 1042
|
#define OPT_VERSION_NUMBER 1042
|
||||||
#define OPT_COMPACT 1043
|
#define OPT_DATA_DIRECTORY_CONFIG 1043
|
||||||
#define OPT_DISABLE_WAL_RECEIVER 1044
|
#define OPT_COMPACT 1044
|
||||||
#define OPT_ENABLE_WAL_RECEIVER 1045
|
#define OPT_DISABLE_WAL_RECEIVER 1045
|
||||||
#define OPT_DETAIL 1046
|
#define OPT_ENABLE_WAL_RECEIVER 1046
|
||||||
#define OPT_REPMGRD_FORCE_UNPAUSE 1047
|
#define OPT_DETAIL 1047
|
||||||
|
#define OPT_REPMGRD_FORCE_UNPAUSE 1048
|
||||||
|
#define OPT_REPLICATION_CONFIG_OWNER 1049
|
||||||
|
|
||||||
/* deprecated since 4.0 */
|
/* deprecated since 4.0 */
|
||||||
#define OPT_CHECK_UPSTREAM_CONFIG 999
|
#define OPT_CHECK_UPSTREAM_CONFIG 999
|
||||||
@@ -157,7 +157,9 @@ static struct option long_options[] =
|
|||||||
{"upstream-conninfo", required_argument, NULL, OPT_UPSTREAM_CONNINFO},
|
{"upstream-conninfo", required_argument, NULL, OPT_UPSTREAM_CONNINFO},
|
||||||
{"upstream-node-id", required_argument, NULL, OPT_UPSTREAM_NODE_ID},
|
{"upstream-node-id", required_argument, NULL, OPT_UPSTREAM_NODE_ID},
|
||||||
{"without-barman", no_argument, NULL, OPT_WITHOUT_BARMAN},
|
{"without-barman", no_argument, NULL, OPT_WITHOUT_BARMAN},
|
||||||
{"recovery-conf-only", no_argument, NULL, OPT_RECOVERY_CONF_ONLY},
|
{"replication-conf-only", no_argument, NULL, OPT_REPLICATION_CONF_ONLY},
|
||||||
|
/* deprecate this once Pg11 and earlier are unsupported */
|
||||||
|
{"recovery-conf-only", no_argument, NULL, OPT_REPLICATION_CONF_ONLY},
|
||||||
|
|
||||||
/* "standby register" options */
|
/* "standby register" options */
|
||||||
{"wait-start", required_argument, NULL, OPT_WAIT_START},
|
{"wait-start", required_argument, NULL, OPT_WAIT_START},
|
||||||
@@ -178,6 +180,7 @@ static struct option long_options[] =
|
|||||||
/* "node check" options */
|
/* "node check" options */
|
||||||
{"archive-ready", no_argument, NULL, OPT_ARCHIVE_READY},
|
{"archive-ready", no_argument, NULL, OPT_ARCHIVE_READY},
|
||||||
{"downstream", no_argument, NULL, OPT_DOWNSTREAM},
|
{"downstream", no_argument, NULL, OPT_DOWNSTREAM},
|
||||||
|
{"upstream", no_argument, NULL, OPT_UPSTREAM},
|
||||||
{"replication-lag", no_argument, NULL, OPT_REPLICATION_LAG},
|
{"replication-lag", no_argument, NULL, OPT_REPLICATION_LAG},
|
||||||
{"role", no_argument, NULL, OPT_ROLE},
|
{"role", no_argument, NULL, OPT_ROLE},
|
||||||
{"slots", no_argument, NULL, OPT_SLOTS},
|
{"slots", no_argument, NULL, OPT_SLOTS},
|
||||||
@@ -185,6 +188,7 @@ static struct option long_options[] =
|
|||||||
{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
|
{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
|
||||||
{"replication-connection", no_argument, NULL, OPT_REPL_CONN},
|
{"replication-connection", no_argument, NULL, OPT_REPL_CONN},
|
||||||
{"data-directory-config", no_argument, NULL, OPT_DATA_DIRECTORY_CONFIG},
|
{"data-directory-config", no_argument, NULL, OPT_DATA_DIRECTORY_CONFIG},
|
||||||
|
{"replication-config-owner", no_argument, NULL, OPT_REPLICATION_CONFIG_OWNER},
|
||||||
|
|
||||||
/* "node rejoin" options */
|
/* "node rejoin" options */
|
||||||
{"config-files", required_argument, NULL, OPT_CONFIG_FILES},
|
{"config-files", required_argument, NULL, OPT_CONFIG_FILES},
|
||||||
|
|||||||
68
repmgr.c
68
repmgr.c
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr.c - repmgr extension
|
* repmgr.c - repmgr extension
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This is the actual extension code; see repmgr-client.c for the code which
|
* This is the actual extension code; see repmgr-client.c for the code which
|
||||||
* generates the repmgr binary
|
* generates the repmgr binary
|
||||||
@@ -84,8 +84,6 @@ typedef struct repmgrdSharedState
|
|||||||
int current_electoral_term;
|
int current_electoral_term;
|
||||||
int candidate_node_id;
|
int candidate_node_id;
|
||||||
bool follow_new_primary;
|
bool follow_new_primary;
|
||||||
/* BDR failover */
|
|
||||||
int bdr_failover_handler;
|
|
||||||
} repmgrdSharedState;
|
} repmgrdSharedState;
|
||||||
|
|
||||||
static repmgrdSharedState *shared_state = NULL;
|
static repmgrdSharedState *shared_state = NULL;
|
||||||
@@ -131,12 +129,6 @@ PG_FUNCTION_INFO_V1(get_new_primary);
|
|||||||
Datum reset_voting_status(PG_FUNCTION_ARGS);
|
Datum reset_voting_status(PG_FUNCTION_ARGS);
|
||||||
PG_FUNCTION_INFO_V1(reset_voting_status);
|
PG_FUNCTION_INFO_V1(reset_voting_status);
|
||||||
|
|
||||||
Datum am_bdr_failover_handler(PG_FUNCTION_ARGS);
|
|
||||||
PG_FUNCTION_INFO_V1(am_bdr_failover_handler);
|
|
||||||
|
|
||||||
Datum unset_bdr_failover_handler(PG_FUNCTION_ARGS);
|
|
||||||
PG_FUNCTION_INFO_V1(unset_bdr_failover_handler);
|
|
||||||
|
|
||||||
Datum set_repmgrd_pid(PG_FUNCTION_ARGS);
|
Datum set_repmgrd_pid(PG_FUNCTION_ARGS);
|
||||||
PG_FUNCTION_INFO_V1(set_repmgrd_pid);
|
PG_FUNCTION_INFO_V1(set_repmgrd_pid);
|
||||||
|
|
||||||
@@ -241,7 +233,6 @@ repmgr_shmem_startup(void)
|
|||||||
shared_state->voting_status = VS_NO_VOTE;
|
shared_state->voting_status = VS_NO_VOTE;
|
||||||
shared_state->candidate_node_id = UNKNOWN_NODE_ID;
|
shared_state->candidate_node_id = UNKNOWN_NODE_ID;
|
||||||
shared_state->follow_new_primary = false;
|
shared_state->follow_new_primary = false;
|
||||||
shared_state->bdr_failover_handler = UNKNOWN_NODE_ID;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
LWLockRelease(AddinShmemInitLock);
|
LWLockRelease(AddinShmemInitLock);
|
||||||
@@ -571,63 +562,6 @@ reset_voting_status(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
Datum
|
|
||||||
am_bdr_failover_handler(PG_FUNCTION_ARGS)
|
|
||||||
{
|
|
||||||
int node_id = UNKNOWN_NODE_ID;
|
|
||||||
bool am_handler = false;
|
|
||||||
|
|
||||||
if (!shared_state)
|
|
||||||
PG_RETURN_NULL();
|
|
||||||
|
|
||||||
if (PG_ARGISNULL(0))
|
|
||||||
PG_RETURN_NULL();
|
|
||||||
|
|
||||||
node_id = PG_GETARG_INT32(0);
|
|
||||||
|
|
||||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
|
||||||
|
|
||||||
if (shared_state->bdr_failover_handler == UNKNOWN_NODE_ID)
|
|
||||||
{
|
|
||||||
LWLockRelease(shared_state->lock);
|
|
||||||
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
|
|
||||||
shared_state->bdr_failover_handler = node_id;
|
|
||||||
am_handler = true;
|
|
||||||
}
|
|
||||||
else if (shared_state->bdr_failover_handler == node_id)
|
|
||||||
{
|
|
||||||
am_handler = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
LWLockRelease(shared_state->lock);
|
|
||||||
|
|
||||||
PG_RETURN_BOOL(am_handler);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
Datum
|
|
||||||
unset_bdr_failover_handler(PG_FUNCTION_ARGS)
|
|
||||||
{
|
|
||||||
if (!shared_state)
|
|
||||||
PG_RETURN_NULL();
|
|
||||||
|
|
||||||
LWLockAcquire(shared_state->lock, LW_SHARED);
|
|
||||||
|
|
||||||
/* only do something if local_node_id is initialised */
|
|
||||||
if (shared_state->local_node_id != UNKNOWN_NODE_ID)
|
|
||||||
{
|
|
||||||
LWLockRelease(shared_state->lock);
|
|
||||||
LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
|
|
||||||
|
|
||||||
shared_state->bdr_failover_handler = UNKNOWN_NODE_ID;
|
|
||||||
}
|
|
||||||
|
|
||||||
LWLockRelease(shared_state->lock);
|
|
||||||
|
|
||||||
PG_RETURN_VOID();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns the repmgrd pid; or NULL if none set; or -1 if set but repmgrd
|
* Returns the repmgrd pid; or NULL if none set; or -1 if set but repmgrd
|
||||||
* process not running (TODO!)
|
* process not running (TODO!)
|
||||||
|
|||||||
@@ -21,7 +21,7 @@
|
|||||||
# repmgr and repmgrd require the following items to be explicitly configured.
|
# repmgr and repmgrd require the following items to be explicitly configured.
|
||||||
|
|
||||||
|
|
||||||
#node_id= # A unique integer greater than zero
|
#node_id= # A unique integer greater than zero
|
||||||
#node_name='' # An arbitrary (but unique) string; we recommend
|
#node_name='' # An arbitrary (but unique) string; we recommend
|
||||||
# using the server's hostname or another identifier
|
# using the server's hostname or another identifier
|
||||||
# unambiguously associated with the server to avoid
|
# unambiguously associated with the server to avoid
|
||||||
@@ -29,8 +29,8 @@
|
|||||||
# node's current role, e.g. 'primary' or 'standby1',
|
# node's current role, e.g. 'primary' or 'standby1',
|
||||||
# as roles can change and it will be confusing if
|
# as roles can change and it will be confusing if
|
||||||
# the current primary is called 'standby1'.
|
# the current primary is called 'standby1'.
|
||||||
# The string's maximum length is 63 characters and it should
|
# The string's maximum length is 63 characters and it should
|
||||||
# contain only printable ASCII characters.
|
# contain only printable ASCII characters.
|
||||||
|
|
||||||
#conninfo='' # Database connection information as a conninfo string.
|
#conninfo='' # Database connection information as a conninfo string.
|
||||||
# All servers in the cluster must be able to connect to
|
# All servers in the cluster must be able to connect to
|
||||||
@@ -71,13 +71,12 @@
|
|||||||
#replication_user='repmgr' # User to make replication connections with, if not set
|
#replication_user='repmgr' # User to make replication connections with, if not set
|
||||||
# defaults to the user defined in "conninfo".
|
# defaults to the user defined in "conninfo".
|
||||||
|
|
||||||
#replication_type='physical' # Must be one of "physical" or "bdr".
|
#replication_type='physical' # Must "physical" (the default).
|
||||||
# NOTE: "bdr" can only be used with BDR 2.x
|
|
||||||
|
|
||||||
#location='default' # An arbitrary string defining the location of the node; this
|
#location='default' # An arbitrary string defining the location of the node; this
|
||||||
# is used during failover to check visibility of the
|
# is used during failover to check visibility of the
|
||||||
# current primary node. For further details see:
|
# current primary node. For further details see:
|
||||||
# https://repmgr.org/docs/current/repmgrd-network-split.html
|
# https://repmgr.org/docs/current/repmgrd-network-split.html
|
||||||
|
|
||||||
#use_replication_slots=no # whether to use physical replication slots
|
#use_replication_slots=no # whether to use physical replication slots
|
||||||
# NOTE: when using replication slots,
|
# NOTE: when using replication slots,
|
||||||
@@ -160,12 +159,13 @@
|
|||||||
|
|
||||||
#repmgr_bindir='' # Path to repmgr binary directory (location of the repmgr
|
#repmgr_bindir='' # Path to repmgr binary directory (location of the repmgr
|
||||||
# binary. Only needed if the repmgr executable is not in
|
# binary. Only needed if the repmgr executable is not in
|
||||||
# the system $PATH or the path defined in "pg_bindir".
|
# the system $PATH or the path defined in "pg_bindir".
|
||||||
|
|
||||||
#use_primary_conninfo_password=false # explicitly set "password" in recovery.conf's
|
#use_primary_conninfo_password=false # explicitly set "password" in "primary_conninfo"
|
||||||
# "primary_conninfo" parameter using the value contained
|
# using the value contained in the environment variable
|
||||||
# in the environment variable PGPASSWORD
|
# PGPASSWORD
|
||||||
#passfile='' # path to .pgpass file to include in "primary_conninfo"
|
#passfile='' # path to .pgpass file to include in "primary_conninfo"
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
# external command options
|
# external command options
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
@@ -179,7 +179,7 @@
|
|||||||
# rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
# rsync_options=--archive --checksum --compress --progress --rsh="ssh -o \"StrictHostKeyChecking no\""
|
||||||
# ssh_options=-o "StrictHostKeyChecking no"
|
# ssh_options=-o "StrictHostKeyChecking no"
|
||||||
|
|
||||||
#pg_ctl_options='' # Options to append to "pg_ctl"
|
#pg_ctl_options='' # Options to append to "pg_ctl"
|
||||||
#pg_basebackup_options='' # Options to append to "pg_basebackup"
|
#pg_basebackup_options='' # Options to append to "pg_basebackup"
|
||||||
#rsync_options='' # Options to append to "rsync"
|
#rsync_options='' # Options to append to "rsync"
|
||||||
ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
||||||
@@ -194,22 +194,22 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
#
|
#
|
||||||
# Examples:
|
# Examples:
|
||||||
#
|
#
|
||||||
# tablespace_mapping=/path/to/original/tablespace=/path/to/new/tablespace
|
# tablespace_mapping='/path/to/original/tablespace=/path/to/new/tablespace'
|
||||||
# restore_command = 'cp /path/to/archived/wals/%f %p'
|
# restore_command = 'cp /path/to/archived/wals/%f %p'
|
||||||
|
|
||||||
#tablespace_mapping='' # Tablespaces can be remapped from one
|
#tablespace_mapping='' # Tablespaces can be remapped from one
|
||||||
# file system location to another. This
|
# file system location to another. This
|
||||||
# parameter can be provided multiple times.
|
# parameter can be provided multiple times.
|
||||||
|
|
||||||
#restore_command='' # This will be placed in the recovery.conf file generated
|
#restore_command='' # This will be included in the recovery configuration
|
||||||
# by repmgr.
|
# generated by repmgr.
|
||||||
|
|
||||||
#archive_cleanup_command='' # This will be placed in the recovery.conf file generated
|
#archive_cleanup_command='' # This will be included in the recovery configuration
|
||||||
# by repmgr. Note we recommend using Barman for managing
|
# generated by repmgr. Note we recommend using Barman for
|
||||||
# WAL archives (see: https://www.pgbarman.org )
|
# managing WAL archives (see: https://www.pgbarman.org )
|
||||||
|
|
||||||
#recovery_min_apply_delay= # If provided, "recovery_min_apply_delay" in recovery.conf
|
#recovery_min_apply_delay= # If provided, "recovery_min_apply_delay" will be set to
|
||||||
# will be set to this value (PostgreSQL 9.4 and later).
|
# this value (PostgreSQL 9.4 and later).
|
||||||
|
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
#------------------------------------------------------------------------------
|
||||||
@@ -289,24 +289,24 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
# node or follow the new upstream node
|
# node or follow the new upstream node
|
||||||
# 'manual': repmgrd will take no action and the node will require
|
# 'manual': repmgrd will take no action and the node will require
|
||||||
# manual attention to reattach it to replication
|
# manual attention to reattach it to replication
|
||||||
# (does not apply to BDR mode)
|
|
||||||
|
|
||||||
#priority=100 # indicates a preferred priority for promoting nodes;
|
#priority=100 # indicates a preferred priority for promoting nodes;
|
||||||
# a value of zero prevents the node being promoted to primary
|
# a value of zero prevents the node being promoted to primary
|
||||||
# (default: 100)
|
# (default: 100)
|
||||||
|
|
||||||
#connection_check_type=ping # How to check availability of the upstream node; valid options:
|
#connection_check_type=ping # How to check availability of the upstream node; valid options:
|
||||||
# 'ping': use PQping() to check if the node is accepting connections
|
# 'ping': use PQping() to check if the node is accepting connections
|
||||||
# 'connection': execute a throwaway query on the current connection
|
# 'connection': execute a throwaway query on the current connection
|
||||||
|
# 'query': execute an SQL statement on the node via the existing connection
|
||||||
#reconnect_attempts=6 # Number of attempts which will be made to reconnect to an unreachable
|
#reconnect_attempts=6 # Number of attempts which will be made to reconnect to an unreachable
|
||||||
# primary (or other upstream node)
|
# primary (or other upstream node)
|
||||||
#reconnect_interval=10 # Interval between attempts to reconnect to an unreachable
|
#reconnect_interval=10 # Interval between attempts to reconnect to an unreachable
|
||||||
# primary (or other upstream node)
|
# primary (or other upstream node)
|
||||||
#promote_command= # command repmgrd executes when promoting a new primary; use something like:
|
#promote_command='' # command repmgrd executes when promoting a new primary; use something like:
|
||||||
#
|
#
|
||||||
# repmgr standby promote -f /etc/repmgr.conf
|
# repmgr standby promote -f /etc/repmgr.conf
|
||||||
#
|
#
|
||||||
#follow_command= # command repmgrd executes when instructing a standby to follow a new primary;
|
#follow_command='' # command repmgrd executes when instructing a standby to follow a new primary;
|
||||||
# use something like:
|
# use something like:
|
||||||
#
|
#
|
||||||
# repmgr standby follow -f /etc/repmgr.conf -W --upstream-node-id=%n
|
# repmgr standby follow -f /etc/repmgr.conf -W --upstream-node-id=%n
|
||||||
@@ -318,8 +318,8 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
# for the the local node to restart and become ready to accept connections after
|
# for the the local node to restart and become ready to accept connections after
|
||||||
# executing "follow_command" (defaults to the value set in "standby_reconnect_timeout")
|
# executing "follow_command" (defaults to the value set in "standby_reconnect_timeout")
|
||||||
|
|
||||||
#monitoring_history=no # Whether to write monitoring data to the "montoring_history" table
|
#monitoring_history=no # Whether to write monitoring data to the "montoring_history" table
|
||||||
#monitor_interval_secs=2 # Interval (in seconds) at which to write monitoring data
|
#monitor_interval_secs=2 # Interval (in seconds) at which to write monitoring data
|
||||||
#degraded_monitoring_timeout=-1 # Interval (in seconds) after which repmgrd will terminate if the
|
#degraded_monitoring_timeout=-1 # Interval (in seconds) after which repmgrd will terminate if the
|
||||||
# server(s) being monitored are no longer available. -1 (default)
|
# server(s) being monitored are no longer available. -1 (default)
|
||||||
# disables the timeout completely.
|
# disables the timeout completely.
|
||||||
@@ -348,7 +348,7 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
# an error, pause the specified amount of seconds before rerunning the election.
|
# an error, pause the specified amount of seconds before rerunning the election.
|
||||||
#
|
#
|
||||||
# The following items are relevant for repmgrd running on the primary,
|
# The following items are relevant for repmgrd running on the primary,
|
||||||
# and will be ignored on non-primary nodes
|
# and will be ignored on non-primary nodes
|
||||||
#child_nodes_check_interval=5 # Interval (in seconds) to check for attached child nodes (standbys)
|
#child_nodes_check_interval=5 # Interval (in seconds) to check for attached child nodes (standbys)
|
||||||
#child_nodes_connected_min_count=-1 # Minimum number of child nodes which must remain connected, otherwise
|
#child_nodes_connected_min_count=-1 # Minimum number of child nodes which must remain connected, otherwise
|
||||||
# disconnection command will be triggered
|
# disconnection command will be triggered
|
||||||
@@ -373,7 +373,7 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
# NOTE: These commands must be runnable on remote nodes as well for switchover
|
# NOTE: These commands must be runnable on remote nodes as well for switchover
|
||||||
# to function correctly.
|
# to function correctly.
|
||||||
#
|
#
|
||||||
# If you use sudo, the user repmgr runs as (usually 'postgres') must have
|
# If you use sudo, the user repmgr runs as (usually 'postgres') must have
|
||||||
# passwordless sudo access to execute the command.
|
# passwordless sudo access to execute the command.
|
||||||
#
|
#
|
||||||
# For example, to use systemd, you can set
|
# For example, to use systemd, you can set
|
||||||
@@ -386,8 +386,8 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
# # this is required when running sudo over ssh without -t:
|
# # this is required when running sudo over ssh without -t:
|
||||||
# Defaults:postgres !requiretty
|
# Defaults:postgres !requiretty
|
||||||
# postgres ALL = NOPASSWD: /usr/bin/systemctl stop postgresql-9.6, \
|
# postgres ALL = NOPASSWD: /usr/bin/systemctl stop postgresql-9.6, \
|
||||||
# /usr/bin/systemctl start postgresql-9.6, \
|
# /usr/bin/systemctl start postgresql-9.6, \
|
||||||
# /usr/bin/systemctl restart postgresql-9.6
|
# /usr/bin/systemctl restart postgresql-9.6
|
||||||
#
|
#
|
||||||
# Debian/Ubuntu users: use "sudo pg_ctlcluster" to execute service control commands.
|
# Debian/Ubuntu users: use "sudo pg_ctlcluster" to execute service control commands.
|
||||||
#
|
#
|
||||||
@@ -414,7 +414,7 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
|
|
||||||
# Various warning/critical thresholds used by "repmgr node check".
|
# Various warning/critical thresholds used by "repmgr node check".
|
||||||
|
|
||||||
#archive_ready_warning=16 # repmgr node check --archive-ready
|
#archive_ready_warning=16 # repmgr node check --archive-ready
|
||||||
#archive_ready_critical=128 #
|
#archive_ready_critical=128 #
|
||||||
# Numbers of files pending archiving via PostgreSQL's
|
# Numbers of files pending archiving via PostgreSQL's
|
||||||
# "archive_command" configuration parameter. If
|
# "archive_command" configuration parameter. If
|
||||||
@@ -435,12 +435,3 @@ ssh_options='-q -o ConnectTimeout=10' # Options to append to "ssh"
|
|||||||
# issues with shutting down the demotion candidate.
|
# issues with shutting down the demotion candidate.
|
||||||
|
|
||||||
|
|
||||||
#------------------------------------------------------------------------------
|
|
||||||
# BDR monitoring options
|
|
||||||
#------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
#bdr_local_monitoring_only=false # Only monitor the local node; no checks will be
|
|
||||||
# performed on the other node
|
|
||||||
#bdr_recovery_timeout # If a BDR node was offline and has become available
|
|
||||||
# maximum length of time in seconds to wait for the
|
|
||||||
# node to reconnect to the cluster
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# repmgr extension
|
# repmgr extension
|
||||||
comment = 'Replication manager for PostgreSQL'
|
comment = 'Replication manager for PostgreSQL'
|
||||||
default_version = '5.0'
|
default_version = '5.1'
|
||||||
module_pathname = '$libdir/repmgr'
|
module_pathname = '$libdir/repmgr'
|
||||||
relocatable = false
|
relocatable = false
|
||||||
schema = repmgr
|
schema = repmgr
|
||||||
|
|||||||
8
repmgr.h
8
repmgr.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgr.h
|
* repmgr.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -78,16 +78,15 @@
|
|||||||
#define MIN_SUPPORTED_VERSION_NUM 90300
|
#define MIN_SUPPORTED_VERSION_NUM 90300
|
||||||
|
|
||||||
#define REPLICATION_TYPE_PHYSICAL 1
|
#define REPLICATION_TYPE_PHYSICAL 1
|
||||||
#define REPLICATION_TYPE_BDR 2
|
|
||||||
|
|
||||||
#define UNKNOWN_SERVER_VERSION_NUM -1
|
#define UNKNOWN_SERVER_VERSION_NUM -1
|
||||||
#define UNKNOWN_BDR_VERSION_NUM -1
|
|
||||||
#define UNKNOWN_REPMGR_VERSION_NUM -1
|
#define UNKNOWN_REPMGR_VERSION_NUM -1
|
||||||
|
|
||||||
#define UNKNOWN_TIMELINE_ID -1
|
#define UNKNOWN_TIMELINE_ID -1
|
||||||
#define UNKNOWN_SYSTEM_IDENTIFIER 0
|
#define UNKNOWN_SYSTEM_IDENTIFIER 0
|
||||||
#define UNKNOWN_PID -1
|
#define UNKNOWN_PID -1
|
||||||
#define UNKNOWN_REPLICATION_LAG -1
|
#define UNKNOWN_REPLICATION_LAG -1
|
||||||
|
#define UNKNOWN_VALUE -1
|
||||||
|
|
||||||
#define NODE_NOT_FOUND -1
|
#define NODE_NOT_FOUND -1
|
||||||
#define NO_UPSTREAM_NODE -1
|
#define NO_UPSTREAM_NODE -1
|
||||||
@@ -98,8 +97,6 @@
|
|||||||
#define ARCHIVE_STATUS_DIR_ERROR -1
|
#define ARCHIVE_STATUS_DIR_ERROR -1
|
||||||
#define NO_DEGRADED_MONITORING_ELAPSED -1
|
#define NO_DEGRADED_MONITORING_ELAPSED -1
|
||||||
|
|
||||||
#define BDR2_REPLICATION_SET_NAME "repmgr"
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* various default values - ensure repmgr.conf.sample is update
|
* various default values - ensure repmgr.conf.sample is update
|
||||||
* if any of these are changed
|
* if any of these are changed
|
||||||
@@ -113,7 +110,6 @@
|
|||||||
#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */
|
#define DEFAULT_PRIMARY_NOTIFICATION_TIMEOUT 60 /* seconds */
|
||||||
#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 /* seconds */
|
#define DEFAULT_PRIMARY_FOLLOW_TIMEOUT 60 /* seconds */
|
||||||
#define DEFAULT_STANDBY_FOLLOW_TIMEOUT 30 /* seconds */
|
#define DEFAULT_STANDBY_FOLLOW_TIMEOUT 30 /* seconds */
|
||||||
#define DEFAULT_BDR_RECOVERY_TIMEOUT 30 /* seconds */
|
|
||||||
#define DEFAULT_ARCHIVE_READY_WARNING 16 /* WAL files */
|
#define DEFAULT_ARCHIVE_READY_WARNING 16 /* WAL files */
|
||||||
#define DEFAULT_ARCHIVE_READY_CRITICAL 128 /* WAL files */
|
#define DEFAULT_ARCHIVE_READY_CRITICAL 128 /* WAL files */
|
||||||
#define DEFAULT_REPLICATION_LAG_WARNING 300 /* seconds */
|
#define DEFAULT_REPLICATION_LAG_WARNING 300 /* seconds */
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
#define REPMGR_VERSION_DATE ""
|
#define REPMGR_VERSION_DATE ""
|
||||||
#define REPMGR_VERSION "5.0"
|
#define REPMGR_VERSION "5.1.0"
|
||||||
#define REPMGR_VERSION_NUM 50000
|
#define REPMGR_VERSION_NUM 50100
|
||||||
#define REPMGR_RELEASE_DATE "2019-XX-XX"
|
#define REPMGR_RELEASE_DATE "2020-04-13"
|
||||||
#define PG_ACTUAL_VERSION_NUM
|
#define PG_ACTUAL_VERSION_NUM
|
||||||
|
|||||||
678
repmgrd-bdr.c
678
repmgrd-bdr.c
@@ -1,678 +0,0 @@
|
|||||||
/*
|
|
||||||
* repmgrd-bdr.c - BDR functionality for repmgrd
|
|
||||||
*
|
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <signal.h>
|
|
||||||
|
|
||||||
#include "repmgr.h"
|
|
||||||
#include "repmgrd.h"
|
|
||||||
#include "repmgrd-bdr.h"
|
|
||||||
#include "configfile.h"
|
|
||||||
|
|
||||||
|
|
||||||
static void do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node);
|
|
||||||
static void do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node);
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
do_bdr_node_check(void)
|
|
||||||
{
|
|
||||||
/* nothing to do at the moment */
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
handle_sigint_bdr(SIGNAL_ARGS)
|
|
||||||
{
|
|
||||||
PQExpBufferData event_details;
|
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
|
||||||
_("%s signal received"),
|
|
||||||
postgres_signal_arg == SIGTERM
|
|
||||||
? "TERM" : "INT");
|
|
||||||
|
|
||||||
log_notice("%s", event_details.data);
|
|
||||||
|
|
||||||
create_event_notification(local_conn,
|
|
||||||
&config_file_options,
|
|
||||||
config_file_options.node_id,
|
|
||||||
"repmgrd_shutdown",
|
|
||||||
true,
|
|
||||||
event_details.data);
|
|
||||||
termPQExpBuffer(&event_details);
|
|
||||||
|
|
||||||
terminate(SUCCESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
monitor_bdr(void)
|
|
||||||
{
|
|
||||||
NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER;
|
|
||||||
t_bdr_node_info bdr_node_info = T_BDR_NODE_INFO_INITIALIZER;
|
|
||||||
RecordStatus record_status;
|
|
||||||
NodeInfoListCell *cell;
|
|
||||||
instr_time log_status_interval_start;
|
|
||||||
|
|
||||||
/* sanity check local database */
|
|
||||||
log_info(_("connecting to local database \"%s\""),
|
|
||||||
config_file_options.conninfo);
|
|
||||||
|
|
||||||
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Local node must be running
|
|
||||||
*/
|
|
||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
|
||||||
{
|
|
||||||
log_error(_("unable connect to local node (ID: %i), terminating"),
|
|
||||||
local_node_info.node_id);
|
|
||||||
log_hint(_("local node must be running before repmgrd can start"));
|
|
||||||
PQfinish(local_conn);
|
|
||||||
exit(ERR_DB_CONN);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Verify that database is a BDR one TODO: check if supported BDR version?
|
|
||||||
*/
|
|
||||||
log_info(_("connected to database, checking for BDR"));
|
|
||||||
|
|
||||||
if (!is_bdr_db(local_conn, NULL))
|
|
||||||
{
|
|
||||||
log_error(_("database is not BDR-enabled"));
|
|
||||||
PQfinish(local_conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Check this is a supported BDR version (basically BDR 2.x)
|
|
||||||
*/
|
|
||||||
if (get_bdr_version_num() > 2)
|
|
||||||
{
|
|
||||||
log_error(_("\"bdr\" mode is for BDR 2.x only"));
|
|
||||||
log_hint(_("for BDR 3 and later, use \"replication_type=physical\""));
|
|
||||||
log_error(_("database is not BDR-enabled"));
|
|
||||||
exit(ERR_DB_CONN);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_table_in_bdr_replication_set(local_conn, "nodes", "repmgr") == false)
|
|
||||||
{
|
|
||||||
log_error(_("repmgr metadata table 'repmgr.%s' is not in the 'repmgr' replication set"),
|
|
||||||
"nodes");
|
|
||||||
|
|
||||||
/*
|
|
||||||
* TODO: add `repmgr bdr sync` or similar for this situation, and hint
|
|
||||||
* here
|
|
||||||
*/
|
|
||||||
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
record_status = get_bdr_node_record_by_name(local_conn, local_node_info.node_name, &bdr_node_info);
|
|
||||||
|
|
||||||
if (record_status != RECORD_FOUND)
|
|
||||||
{
|
|
||||||
log_error(_("unable to retrieve BDR record for node \"%s\", terminating"),
|
|
||||||
local_node_info.node_name);
|
|
||||||
PQfinish(local_conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (local_node_info.active == false)
|
|
||||||
{
|
|
||||||
log_error(_("local node (ID: %i) is marked as inactive in repmgr"),
|
|
||||||
local_node_info.node_id);
|
|
||||||
log_hint(_("if the node has been reactivated, run \"repmgr bdr register --force\" and restart repmgrd"));
|
|
||||||
PQfinish(local_conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_active_bdr_node(local_conn, local_node_info.node_name) == false)
|
|
||||||
{
|
|
||||||
log_error(_("BDR node \"%s\" is not active, terminating"),
|
|
||||||
local_node_info.node_name);
|
|
||||||
PQfinish(local_conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Log startup event */
|
|
||||||
create_event_record(local_conn,
|
|
||||||
&config_file_options,
|
|
||||||
config_file_options.node_id,
|
|
||||||
"repmgrd_start",
|
|
||||||
true,
|
|
||||||
NULL);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* retrieve list of all nodes - we'll need these if the DB connection goes
|
|
||||||
* away
|
|
||||||
*/
|
|
||||||
if (get_all_node_records(local_conn, &nodes) == false)
|
|
||||||
{
|
|
||||||
/* get_all_node_records() will display the error */
|
|
||||||
PQfinish(local_conn);
|
|
||||||
exit(ERR_BAD_CONFIG);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* we're expecting all (both) nodes to be up */
|
|
||||||
for (cell = nodes.head; cell; cell = cell->next)
|
|
||||||
{
|
|
||||||
cell->node_info->node_status = NODE_STATUS_UP;
|
|
||||||
}
|
|
||||||
|
|
||||||
log_info(_("starting continuous BDR node monitoring on node %i"),
|
|
||||||
config_file_options.node_id);
|
|
||||||
|
|
||||||
INSTR_TIME_SET_CURRENT(log_status_interval_start);
|
|
||||||
|
|
||||||
while (true)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* monitoring loop */
|
|
||||||
log_verbose(LOG_DEBUG, "BDR check loop - checking %i nodes", nodes.node_count);
|
|
||||||
|
|
||||||
for (cell = nodes.head; cell; cell = cell->next)
|
|
||||||
{
|
|
||||||
if (config_file_options.bdr_local_monitoring_only == true
|
|
||||||
&& cell->node_info->node_id != local_node_info.node_id)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cell->node_info->node_id == local_node_info.node_id)
|
|
||||||
{
|
|
||||||
log_debug("checking local node %i in %s state",
|
|
||||||
local_node_info.node_id,
|
|
||||||
print_monitoring_state(cell->node_info->monitoring_state));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
log_debug("checking other node %i in %s state",
|
|
||||||
cell->node_info->node_id,
|
|
||||||
print_monitoring_state(cell->node_info->monitoring_state));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
switch (cell->node_info->monitoring_state)
|
|
||||||
{
|
|
||||||
case MS_NORMAL:
|
|
||||||
{
|
|
||||||
if (is_server_available(cell->node_info->conninfo) == false)
|
|
||||||
{
|
|
||||||
/* node is down, we were expecting it to be up */
|
|
||||||
if (cell->node_info->node_status == NODE_STATUS_UP)
|
|
||||||
{
|
|
||||||
instr_time node_unreachable_start;
|
|
||||||
|
|
||||||
INSTR_TIME_SET_CURRENT(node_unreachable_start);
|
|
||||||
|
|
||||||
cell->node_info->node_status = NODE_STATUS_DOWN;
|
|
||||||
|
|
||||||
if (cell->node_info->conn != NULL)
|
|
||||||
{
|
|
||||||
PQfinish(cell->node_info->conn);
|
|
||||||
cell->node_info->conn = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
log_warning(_("unable to connect to node \"%s\" (ID %i)"),
|
|
||||||
cell->node_info->node_name, cell->node_info->node_id);
|
|
||||||
try_reconnect(&cell->node_info->conn, cell->node_info);
|
|
||||||
|
|
||||||
/* node has recovered - log and continue */
|
|
||||||
if (cell->node_info->node_status == NODE_STATUS_UP)
|
|
||||||
{
|
|
||||||
int node_unreachable_elapsed = calculate_elapsed(node_unreachable_start);
|
|
||||||
PQExpBufferData event_details;
|
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
|
||||||
_("reconnected to node %i after %i seconds"),
|
|
||||||
cell->node_info->node_id,
|
|
||||||
node_unreachable_elapsed);
|
|
||||||
log_notice("%s", event_details.data);
|
|
||||||
|
|
||||||
create_event_notification(cell->node_info->conn,
|
|
||||||
&config_file_options,
|
|
||||||
config_file_options.node_id,
|
|
||||||
"bdr_reconnect",
|
|
||||||
true,
|
|
||||||
event_details.data);
|
|
||||||
termPQExpBuffer(&event_details);
|
|
||||||
|
|
||||||
goto loop;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* still down after reconnect attempt(s) */
|
|
||||||
if (cell->node_info->node_status == NODE_STATUS_DOWN)
|
|
||||||
{
|
|
||||||
do_bdr_failover(&nodes, cell->node_info);
|
|
||||||
goto loop;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case MS_DEGRADED:
|
|
||||||
{
|
|
||||||
/* degraded monitoring */
|
|
||||||
if (is_server_available(cell->node_info->conninfo) == true)
|
|
||||||
{
|
|
||||||
do_bdr_recovery(&nodes, cell->node_info);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
loop:
|
|
||||||
|
|
||||||
/* emit "still alive" log message at regular intervals, if requested */
|
|
||||||
if (config_file_options.log_status_interval > 0)
|
|
||||||
{
|
|
||||||
int log_status_interval_elapsed = calculate_elapsed(log_status_interval_start);
|
|
||||||
if (log_status_interval_elapsed >= config_file_options.log_status_interval)
|
|
||||||
{
|
|
||||||
log_info(_("monitoring BDR replication status on node \"%s\" (ID: %i)"),
|
|
||||||
local_node_info.node_name,
|
|
||||||
local_node_info.node_id);
|
|
||||||
|
|
||||||
for (cell = nodes.head; cell; cell = cell->next)
|
|
||||||
{
|
|
||||||
if (cell->node_info->monitoring_state == MS_DEGRADED)
|
|
||||||
{
|
|
||||||
log_detail(_("monitoring node \"%s\" (ID: %i) in degraded mode"),
|
|
||||||
cell->node_info->node_name,
|
|
||||||
cell->node_info->node_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
INSTR_TIME_SET_CURRENT(log_status_interval_start);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (got_SIGHUP)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* if we can reload, then could need to change local_conn
|
|
||||||
*/
|
|
||||||
if (reload_config(&config_file_options, BDR))
|
|
||||||
{
|
|
||||||
PQfinish(local_conn);
|
|
||||||
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
|
||||||
update_registration(local_conn);
|
|
||||||
}
|
|
||||||
|
|
||||||
got_SIGHUP = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* XXX this looks like it will never be called */
|
|
||||||
if (got_SIGHUP)
|
|
||||||
{
|
|
||||||
log_debug("SIGHUP received");
|
|
||||||
|
|
||||||
if (reload_config(&config_file_options, BDR))
|
|
||||||
{
|
|
||||||
PQfinish(local_conn);
|
|
||||||
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
|
||||||
|
|
||||||
if (*config_file_options.log_file)
|
|
||||||
{
|
|
||||||
FILE *fd;
|
|
||||||
|
|
||||||
fd = freopen(config_file_options.log_file, "a", stderr);
|
|
||||||
if (fd == NULL)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "error reopening stderr to \"%s\": %s",
|
|
||||||
config_file_options.log_file, strerror(errno));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
got_SIGHUP = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "sleeping %i seconds (\"monitor_interval_secs\")",
|
|
||||||
config_file_options.monitor_interval_secs);
|
|
||||||
sleep(config_file_options.monitor_interval_secs);
|
|
||||||
}
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* do_bdr_failover()
|
|
||||||
*
|
|
||||||
* Here we attempt to perform a BDR "failover".
|
|
||||||
*
|
|
||||||
* As there's no equivalent of a physical replication failover,
|
|
||||||
* we'll do the following:
|
|
||||||
*
|
|
||||||
* - connect to active node
|
|
||||||
* - generate an event log record on that node
|
|
||||||
* - optionally execute `bdr_failover_command`, passing the conninfo string
|
|
||||||
* of that node to the command; this can be used for e.g. reconfiguring
|
|
||||||
* pgbouncer.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
void
|
|
||||||
do_bdr_failover(NodeInfoList *nodes, t_node_info *monitored_node)
|
|
||||||
{
|
|
||||||
PGconn *next_node_conn = NULL;
|
|
||||||
NodeInfoListCell *cell;
|
|
||||||
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
|
|
||||||
t_node_info target_node = T_NODE_INFO_INITIALIZER;
|
|
||||||
t_node_info failed_node = T_NODE_INFO_INITIALIZER;
|
|
||||||
RecordStatus record_status;
|
|
||||||
|
|
||||||
/* if one of the two nodes is down, cluster will be in a degraded state */
|
|
||||||
monitored_node->monitoring_state = MS_DEGRADED;
|
|
||||||
INSTR_TIME_SET_CURRENT(degraded_monitoring_start);
|
|
||||||
|
|
||||||
/* terminate local connection if this is the failed node */
|
|
||||||
if (monitored_node->node_id == local_node_info.node_id)
|
|
||||||
{
|
|
||||||
PQfinish(local_conn);
|
|
||||||
local_conn = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* get other node */
|
|
||||||
|
|
||||||
for (cell = nodes->head; cell; cell = cell->next)
|
|
||||||
{
|
|
||||||
log_debug("do_bdr_failover() %s", cell->node_info->node_name);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* don't attempt to connect to the current monitored node, as that's
|
|
||||||
* the one which has failed
|
|
||||||
*/
|
|
||||||
if (cell->node_info->node_id == monitored_node->node_id)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* TODO: reuse local conn if local node is up */
|
|
||||||
next_node_conn = establish_db_connection(cell->node_info->conninfo, false);
|
|
||||||
|
|
||||||
if (PQstatus(next_node_conn) == CONNECTION_OK)
|
|
||||||
{
|
|
||||||
record_status = get_node_record(next_node_conn,
|
|
||||||
cell->node_info->node_id,
|
|
||||||
&target_node);
|
|
||||||
|
|
||||||
if (record_status == RECORD_FOUND)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
next_node_conn = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* shouldn't happen, and if it does, it means everything is down */
|
|
||||||
if (next_node_conn == NULL)
|
|
||||||
{
|
|
||||||
log_error(_("no other available node found"));
|
|
||||||
|
|
||||||
/* no other nodes found - continue degraded monitoring */
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* check if the node record for the failed node is still marked as active,
|
|
||||||
* if not it means the other node has done the "failover" already
|
|
||||||
*/
|
|
||||||
|
|
||||||
record_status = get_node_record(next_node_conn,
|
|
||||||
monitored_node->node_id,
|
|
||||||
&failed_node);
|
|
||||||
|
|
||||||
if (record_status == RECORD_FOUND && failed_node.active == false)
|
|
||||||
{
|
|
||||||
PQfinish(next_node_conn);
|
|
||||||
log_notice(_("record for node %i has already been set inactive"),
|
|
||||||
failed_node.node_id);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (am_bdr_failover_handler(next_node_conn, local_node_info.node_id) == false)
|
|
||||||
{
|
|
||||||
PQfinish(next_node_conn);
|
|
||||||
log_notice(_("other node's repmgrd is handling failover"));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* check here that the node hasn't come back up */
|
|
||||||
if (is_server_available(monitored_node->conninfo) == true)
|
|
||||||
{
|
|
||||||
log_notice(_("node %i has reappeared, aborting failover"),
|
|
||||||
monitored_node->node_id);
|
|
||||||
monitored_node->monitoring_state = MS_NORMAL;
|
|
||||||
PQfinish(next_node_conn);
|
|
||||||
}
|
|
||||||
|
|
||||||
log_debug("this node is the failover handler");
|
|
||||||
|
|
||||||
{
|
|
||||||
PQExpBufferData event_details;
|
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
|
||||||
|
|
||||||
event_info.conninfo_str = target_node.conninfo;
|
|
||||||
event_info.node_name = target_node.node_name;
|
|
||||||
|
|
||||||
/* update node record on the active node */
|
|
||||||
update_node_record_set_active(next_node_conn, monitored_node->node_id, false);
|
|
||||||
|
|
||||||
log_notice(_("setting node record for node %i to inactive"), monitored_node->node_id);
|
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
|
||||||
_("node \"%s\" (ID: %i) detected as failed; next available node is \"%s\" (ID: %i)"),
|
|
||||||
monitored_node->node_name,
|
|
||||||
monitored_node->node_id,
|
|
||||||
target_node.node_name,
|
|
||||||
target_node.node_id);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Create an event record
|
|
||||||
*
|
|
||||||
* If we were able to connect to another node, we'll update the event log
|
|
||||||
* there.
|
|
||||||
*
|
|
||||||
* In any case the event notification command will be triggered with the
|
|
||||||
* event "bdr_failover"
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
create_event_notification_extended(next_node_conn,
|
|
||||||
&config_file_options,
|
|
||||||
monitored_node->node_id,
|
|
||||||
"bdr_failover",
|
|
||||||
true,
|
|
||||||
event_details.data,
|
|
||||||
&event_info);
|
|
||||||
|
|
||||||
log_info("%s", event_details.data);
|
|
||||||
|
|
||||||
termPQExpBuffer(&event_details);
|
|
||||||
}
|
|
||||||
|
|
||||||
unset_bdr_failover_handler(next_node_conn);
|
|
||||||
|
|
||||||
PQfinish(next_node_conn);
|
|
||||||
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
do_bdr_recovery(NodeInfoList *nodes, t_node_info *monitored_node)
|
|
||||||
{
|
|
||||||
PGconn *recovered_node_conn;
|
|
||||||
|
|
||||||
t_event_info event_info = T_EVENT_INFO_INITIALIZER;
|
|
||||||
int i;
|
|
||||||
bool slot_reactivated = false;
|
|
||||||
int node_recovery_elapsed;
|
|
||||||
|
|
||||||
char node_name[MAXLEN] = "";
|
|
||||||
|
|
||||||
log_debug("handling recovery for monitored node %i", monitored_node->node_id);
|
|
||||||
|
|
||||||
recovered_node_conn = establish_db_connection(monitored_node->conninfo, false);
|
|
||||||
|
|
||||||
if (PQstatus(recovered_node_conn) != CONNECTION_OK)
|
|
||||||
{
|
|
||||||
PQfinish(recovered_node_conn);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
|
||||||
{
|
|
||||||
log_debug("no local connection - attempting to reconnect ");
|
|
||||||
local_conn = establish_db_connection(config_file_options.conninfo, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* still unable to connect - the local node is probably down, so we can't
|
|
||||||
* check for reconnection
|
|
||||||
*/
|
|
||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
|
||||||
{
|
|
||||||
PQExpBufferData event_details;
|
|
||||||
|
|
||||||
local_conn = NULL;
|
|
||||||
log_warning(_("unable to reconnect to local node"));
|
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
|
||||||
|
|
||||||
node_recovery_elapsed = calculate_elapsed(degraded_monitoring_start);
|
|
||||||
monitored_node->monitoring_state = MS_NORMAL;
|
|
||||||
monitored_node->node_status = NODE_STATUS_UP;
|
|
||||||
|
|
||||||
appendPQExpBuffer(
|
|
||||||
&event_details,
|
|
||||||
_("node \"%s\" (ID: %i) has become available after %i seconds"),
|
|
||||||
monitored_node->node_name,
|
|
||||||
monitored_node->node_id,
|
|
||||||
node_recovery_elapsed);
|
|
||||||
|
|
||||||
log_notice("%s", event_details.data);
|
|
||||||
|
|
||||||
termPQExpBuffer(&event_details);
|
|
||||||
|
|
||||||
PQfinish(recovered_node_conn);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
get_bdr_other_node_name(local_conn, local_node_info.node_id, node_name);
|
|
||||||
|
|
||||||
log_info(_("detected recovery on node \"%s\" (ID: %i), checking status"),
|
|
||||||
monitored_node->node_name,
|
|
||||||
monitored_node->node_id);
|
|
||||||
|
|
||||||
for (i = 0; i < config_file_options.bdr_recovery_timeout; i++)
|
|
||||||
{
|
|
||||||
ReplSlotStatus slot_status;
|
|
||||||
|
|
||||||
log_debug("checking for state of replication slot for node \"%s\"", node_name);
|
|
||||||
|
|
||||||
slot_status = get_bdr_node_replication_slot_status(
|
|
||||||
local_conn,
|
|
||||||
node_name);
|
|
||||||
|
|
||||||
if (slot_status == SLOT_ACTIVE)
|
|
||||||
{
|
|
||||||
slot_reactivated = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
sleep(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* mark node as up */
|
|
||||||
monitored_node->node_status = NODE_STATUS_UP;
|
|
||||||
|
|
||||||
if (slot_reactivated == false)
|
|
||||||
{
|
|
||||||
log_warning(_("no active replication slot for node \"%s\" found after %i seconds"),
|
|
||||||
node_name,
|
|
||||||
config_file_options.bdr_recovery_timeout);
|
|
||||||
log_detail(_("this probably means inter-node BDR connections have not been re-established"));
|
|
||||||
PQfinish(recovered_node_conn);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
log_info(_("active replication slot for node \"%s\" found after %i seconds"),
|
|
||||||
node_name,
|
|
||||||
i);
|
|
||||||
|
|
||||||
node_recovery_elapsed = calculate_elapsed(degraded_monitoring_start);
|
|
||||||
monitored_node->monitoring_state = MS_NORMAL;
|
|
||||||
|
|
||||||
{
|
|
||||||
PQExpBufferData event_details;
|
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
|
||||||
_("node \"%s\" (ID: %i) has recovered after %i seconds"),
|
|
||||||
monitored_node->node_name,
|
|
||||||
monitored_node->node_id,
|
|
||||||
node_recovery_elapsed);
|
|
||||||
|
|
||||||
log_notice("%s", event_details.data);
|
|
||||||
|
|
||||||
|
|
||||||
/* other node will generate the event */
|
|
||||||
if (monitored_node->node_id == local_node_info.node_id)
|
|
||||||
{
|
|
||||||
termPQExpBuffer(&event_details);
|
|
||||||
PQfinish(recovered_node_conn);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* generate the event on the currently active node only */
|
|
||||||
if (monitored_node->node_id != local_node_info.node_id)
|
|
||||||
{
|
|
||||||
event_info.conninfo_str = monitored_node->conninfo;
|
|
||||||
event_info.node_name = monitored_node->node_name;
|
|
||||||
|
|
||||||
create_event_notification_extended(local_conn,
|
|
||||||
&config_file_options,
|
|
||||||
config_file_options.node_id,
|
|
||||||
"bdr_recovery",
|
|
||||||
true,
|
|
||||||
event_details.data,
|
|
||||||
&event_info);
|
|
||||||
}
|
|
||||||
|
|
||||||
termPQExpBuffer(&event_details);
|
|
||||||
}
|
|
||||||
|
|
||||||
update_node_record_set_active(local_conn, monitored_node->node_id, true);
|
|
||||||
|
|
||||||
PQfinish(recovered_node_conn);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
/*
|
|
||||||
* repmgrd-bdr.h
|
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _REPMGRD_BDR_H_
|
|
||||||
#define _REPMGRD_BDR_H_
|
|
||||||
|
|
||||||
extern void do_bdr_node_check(void);
|
|
||||||
extern void monitor_bdr(void);
|
|
||||||
|
|
||||||
extern void handle_sigint_bdr(SIGNAL_ARGS);
|
|
||||||
#endif /* _REPMGRD_BDR_H_ */
|
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* repmgrd-physical.c - physical (streaming) replication functionality for repmgrd
|
* repmgrd-physical.c - physical (streaming) replication functionality for repmgrd
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -125,6 +125,8 @@ static void clear_child_node_info_list(t_child_node_info_list *nodes);
|
|||||||
static void parse_child_nodes_disconnect_command(char *parsed_command, char *template, int reporting_node_id);
|
static void parse_child_nodes_disconnect_command(char *parsed_command, char *template, int reporting_node_id);
|
||||||
static void execute_child_nodes_disconnect_command(NodeInfoList *db_child_node_records, t_child_node_info_list *local_child_nodes);
|
static void execute_child_nodes_disconnect_command(NodeInfoList *db_child_node_records, t_child_node_info_list *local_child_nodes);
|
||||||
|
|
||||||
|
static int try_primary_reconnect(PGconn **conn, PGconn *local_conn, t_node_info *node_info);
|
||||||
|
|
||||||
void
|
void
|
||||||
handle_sigint_physical(SIGNAL_ARGS)
|
handle_sigint_physical(SIGNAL_ARGS)
|
||||||
{
|
{
|
||||||
@@ -1326,6 +1328,7 @@ monitor_streaming_standby(void)
|
|||||||
*/
|
*/
|
||||||
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
close_connection(&upstream_conn);
|
||||||
log_error(_("unable connect to upstream node (ID: %i), terminating"),
|
log_error(_("unable connect to upstream node (ID: %i), terminating"),
|
||||||
local_node_info.upstream_node_id);
|
local_node_info.upstream_node_id);
|
||||||
log_hint(_("upstream node must be running before repmgrd can start"));
|
log_hint(_("upstream node must be running before repmgrd can start"));
|
||||||
@@ -1337,8 +1340,8 @@ monitor_streaming_standby(void)
|
|||||||
|
|
||||||
if (upstream_node_info.node_id == local_node_info.node_id)
|
if (upstream_node_info.node_id == local_node_info.node_id)
|
||||||
{
|
{
|
||||||
PQfinish(upstream_conn);
|
close_connection(&upstream_conn);
|
||||||
upstream_conn = NULL;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1362,6 +1365,8 @@ monitor_streaming_standby(void)
|
|||||||
|
|
||||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
if (PQstatus(primary_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
close_connection(&primary_conn);
|
||||||
|
|
||||||
log_error(_("unable to connect to primary node"));
|
log_error(_("unable to connect to primary node"));
|
||||||
log_hint(_("ensure the primary node is reachable from this node"));
|
log_hint(_("ensure the primary node is reachable from this node"));
|
||||||
|
|
||||||
@@ -1438,6 +1443,7 @@ monitor_streaming_standby(void)
|
|||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
log_verbose(LOG_DEBUG, "checking %s", upstream_node_info.conninfo);
|
log_verbose(LOG_DEBUG, "checking %s", upstream_node_info.conninfo);
|
||||||
|
|
||||||
if (check_upstream_connection(&upstream_conn, upstream_node_info.conninfo) == true)
|
if (check_upstream_connection(&upstream_conn, upstream_node_info.conninfo) == true)
|
||||||
{
|
{
|
||||||
set_upstream_last_seen(local_conn, upstream_node_info.node_id);
|
set_upstream_last_seen(local_conn, upstream_node_info.node_id);
|
||||||
@@ -1497,7 +1503,34 @@ monitor_streaming_standby(void)
|
|||||||
check_connection(&local_node_info, &local_conn);
|
check_connection(&local_node_info, &local_conn);
|
||||||
}
|
}
|
||||||
|
|
||||||
try_reconnect(&upstream_conn, &upstream_node_info);
|
|
||||||
|
if (upstream_node_info.type == PRIMARY)
|
||||||
|
{
|
||||||
|
primary_node_id = try_primary_reconnect(&upstream_conn, local_conn, &upstream_node_info);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We were notified by the the primary during our own reconnection
|
||||||
|
* retry phase, in which case we can leave the failover process early
|
||||||
|
* and connect to the new primary.
|
||||||
|
*/
|
||||||
|
if (primary_node_id == ELECTION_RERUN_NOTIFICATION)
|
||||||
|
{
|
||||||
|
if (do_primary_failover() == true)
|
||||||
|
{
|
||||||
|
primary_node_id = get_primary_node_id(local_conn);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (primary_node_id != UNKNOWN_NODE_ID && primary_node_id != ELECTION_RERUN_NOTIFICATION)
|
||||||
|
{
|
||||||
|
follow_new_primary(primary_node_id);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
try_reconnect(&upstream_conn, &upstream_node_info);
|
||||||
|
}
|
||||||
|
|
||||||
/* Upstream node has recovered - log and continue */
|
/* Upstream node has recovered - log and continue */
|
||||||
if (upstream_node_info.node_status == NODE_STATUS_UP)
|
if (upstream_node_info.node_status == NODE_STATUS_UP)
|
||||||
@@ -1528,8 +1561,9 @@ monitor_streaming_standby(void)
|
|||||||
|
|
||||||
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
||||||
upstream_node_info.node_name, upstream_node_info.node_id);
|
upstream_node_info.node_name, upstream_node_info.node_id);
|
||||||
PQfinish(upstream_conn);
|
|
||||||
upstream_conn = NULL;
|
close_connection(&upstream_conn);
|
||||||
|
|
||||||
local_node_info.upstream_node_id = UNKNOWN_NODE_ID;
|
local_node_info.upstream_node_id = UNKNOWN_NODE_ID;
|
||||||
|
|
||||||
/* check local connection */
|
/* check local connection */
|
||||||
@@ -1539,7 +1573,7 @@ monitor_streaming_standby(void)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
PQfinish(local_conn);
|
close_connection(&local_conn);
|
||||||
|
|
||||||
for (i = 0; i < config_file_options.repmgrd_standby_startup_timeout; i++)
|
for (i = 0; i < config_file_options.repmgrd_standby_startup_timeout; i++)
|
||||||
{
|
{
|
||||||
@@ -1548,6 +1582,8 @@ monitor_streaming_standby(void)
|
|||||||
if (PQstatus(local_conn) == CONNECTION_OK)
|
if (PQstatus(local_conn) == CONNECTION_OK)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
close_connection(&local_conn);
|
||||||
|
|
||||||
log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node",
|
log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node",
|
||||||
i + 1,
|
i + 1,
|
||||||
config_file_options.repmgrd_standby_startup_timeout);
|
config_file_options.repmgrd_standby_startup_timeout);
|
||||||
@@ -1677,7 +1713,12 @@ monitor_streaming_standby(void)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (primary_conn == NULL || PQstatus(primary_conn) != CONNECTION_OK)
|
if (primary_conn != NULL && PQstatus(primary_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
close_connection(&primary_conn);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (primary_conn == NULL)
|
||||||
{
|
{
|
||||||
primary_conn = establish_primary_db_connection(upstream_conn, false);
|
primary_conn = establish_primary_db_connection(upstream_conn, false);
|
||||||
}
|
}
|
||||||
@@ -1686,7 +1727,8 @@ monitor_streaming_standby(void)
|
|||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("reconnected to upstream node %i after %i seconds, resuming monitoring"),
|
_("reconnected to upstream node \"%s\" (ID: %i) after %i seconds, resuming monitoring"),
|
||||||
|
upstream_node_info.node_name,
|
||||||
upstream_node_info.node_id,
|
upstream_node_info.node_id,
|
||||||
degraded_monitoring_elapsed);
|
degraded_monitoring_elapsed);
|
||||||
|
|
||||||
@@ -1822,7 +1864,9 @@ monitor_streaming_standby(void)
|
|||||||
|
|
||||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
log_debug("unable to connect to %i ... ", cell->node_info->node_id);
|
log_debug("unable to connect to %i ... ", cell->node_info->node_id);
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2007,7 +2051,8 @@ loop:
|
|||||||
|
|
||||||
if (last_known_upstream_node_id != local_node_info.upstream_node_id)
|
if (last_known_upstream_node_id != local_node_info.upstream_node_id)
|
||||||
{
|
{
|
||||||
log_notice(_("local node %i upstream appears to have changed, restarting monitoring"),
|
log_notice(_("upstream for local node \"%s\" (ID: %i) appears to have changed, restarting monitoring"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
log_detail(_("currently monitoring upstream %i; new upstream is %i"),
|
log_detail(_("currently monitoring upstream %i; new upstream is %i"),
|
||||||
last_known_upstream_node_id,
|
last_known_upstream_node_id,
|
||||||
@@ -2021,7 +2066,8 @@ loop:
|
|||||||
*/
|
*/
|
||||||
if (local_node_info.type != STANDBY)
|
if (local_node_info.type != STANDBY)
|
||||||
{
|
{
|
||||||
log_notice(_("local node %i is no longer a standby, restarting monitoring"),
|
log_notice(_("local node \"%s\" (ID: %i) is no longer a standby, restarting monitoring"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
close_connection(&upstream_conn);
|
close_connection(&upstream_conn);
|
||||||
return;
|
return;
|
||||||
@@ -2046,8 +2092,8 @@ loop:
|
|||||||
{
|
{
|
||||||
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
||||||
upstream_node_info.node_name, upstream_node_info.node_id);
|
upstream_node_info.node_name, upstream_node_info.node_id);
|
||||||
PQfinish(primary_conn);
|
|
||||||
primary_conn = NULL;
|
close_connection(&primary_conn);
|
||||||
|
|
||||||
local_node_info.upstream_node_id = UNKNOWN_NODE_ID;
|
local_node_info.upstream_node_id = UNKNOWN_NODE_ID;
|
||||||
return;
|
return;
|
||||||
@@ -2115,7 +2161,8 @@ loop:
|
|||||||
|
|
||||||
if (last_known_upstream_node_id != local_node_info.upstream_node_id)
|
if (last_known_upstream_node_id != local_node_info.upstream_node_id)
|
||||||
{
|
{
|
||||||
log_notice(_("local node %i's upstream appears to have changed, restarting monitoring"),
|
log_notice(_("local node \"%s\" (ID: %i)'s upstream appears to have changed, restarting monitoring"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
log_detail(_("currently monitoring upstream %i; new upstream is %i"),
|
log_detail(_("currently monitoring upstream %i; new upstream is %i"),
|
||||||
last_known_upstream_node_id,
|
last_known_upstream_node_id,
|
||||||
@@ -2306,8 +2353,9 @@ monitor_streaming_witness(void)
|
|||||||
{
|
{
|
||||||
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
||||||
upstream_node_info.node_name, upstream_node_info.node_id);
|
upstream_node_info.node_name, upstream_node_info.node_id);
|
||||||
PQfinish(primary_conn);
|
|
||||||
primary_conn = NULL;
|
close_connection(&primary_conn);
|
||||||
|
|
||||||
termPQExpBuffer(&event_details);
|
termPQExpBuffer(&event_details);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -2368,7 +2416,8 @@ monitor_streaming_witness(void)
|
|||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("reconnected to upstream node %i after %i seconds, resuming monitoring"),
|
_("reconnected to upstream node \"%s\" (ID: %i) after %i seconds, resuming monitoring"),
|
||||||
|
upstream_node_info.node_name,
|
||||||
upstream_node_info.node_id,
|
upstream_node_info.node_id,
|
||||||
degraded_monitoring_elapsed);
|
degraded_monitoring_elapsed);
|
||||||
|
|
||||||
@@ -2378,9 +2427,11 @@ monitor_streaming_witness(void)
|
|||||||
if (get_recovery_type(primary_conn) != RECTYPE_PRIMARY)
|
if (get_recovery_type(primary_conn) != RECTYPE_PRIMARY)
|
||||||
{
|
{
|
||||||
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
||||||
upstream_node_info.node_name, upstream_node_info.node_id);
|
upstream_node_info.node_name,
|
||||||
PQfinish(primary_conn);
|
upstream_node_info.node_id);
|
||||||
primary_conn = NULL;
|
|
||||||
|
close_connection(&primary_conn);
|
||||||
|
|
||||||
termPQExpBuffer(&event_details);
|
termPQExpBuffer(&event_details);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -2434,7 +2485,9 @@ monitor_streaming_witness(void)
|
|||||||
|
|
||||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
log_debug("unable to connect to %i ... ", cell->node_info->node_id);
|
log_debug("unable to connect to %i ... ", cell->node_info->node_id);
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2576,8 +2629,9 @@ loop:
|
|||||||
{
|
{
|
||||||
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
log_notice(_("current upstream node \"%s\" (ID: %i) is not primary, restarting monitoring"),
|
||||||
upstream_node_info.node_name, upstream_node_info.node_id);
|
upstream_node_info.node_name, upstream_node_info.node_id);
|
||||||
PQfinish(primary_conn);
|
|
||||||
primary_conn = NULL;
|
close_connection(&primary_conn);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2632,7 +2686,6 @@ loop:
|
|||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -2688,24 +2741,32 @@ do_primary_failover(void)
|
|||||||
{
|
{
|
||||||
for (cell = check_sibling_nodes.head; cell; cell = cell->next)
|
for (cell = check_sibling_nodes.head; cell; cell = cell->next)
|
||||||
{
|
{
|
||||||
pid_t sibling_wal_receiver_pid;
|
|
||||||
|
|
||||||
if (cell->node_info->conn == NULL)
|
if (cell->node_info->conn == NULL)
|
||||||
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
|
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
|
||||||
|
|
||||||
sibling_wal_receiver_pid = (pid_t)get_wal_receiver_pid(cell->node_info->conn);
|
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||||
|
|
||||||
if (sibling_wal_receiver_pid == UNKNOWN_PID)
|
|
||||||
{
|
{
|
||||||
log_warning(_("unable to query WAL receiver PID on node %i"),
|
log_warning(_("unable to query WAL receiver PID on node \"%s\" (ID: %i)"),
|
||||||
|
cell->node_info->node_name,
|
||||||
cell->node_info->node_id);
|
cell->node_info->node_id);
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
}
|
}
|
||||||
else if (sibling_wal_receiver_pid > 0)
|
else
|
||||||
{
|
{
|
||||||
log_info(_("WAL receiver PID on node %i is %i"),
|
pid_t sibling_wal_receiver_pid = (pid_t)get_wal_receiver_pid(cell->node_info->conn);
|
||||||
cell->node_info->node_id,
|
|
||||||
sibling_wal_receiver_pid);
|
if (sibling_wal_receiver_pid == UNKNOWN_PID)
|
||||||
sibling_node_wal_receiver_connected = true;
|
{
|
||||||
|
log_warning(_("unable to query WAL receiver PID on node %i"),
|
||||||
|
cell->node_info->node_id);
|
||||||
|
}
|
||||||
|
else if (sibling_wal_receiver_pid > 0)
|
||||||
|
{
|
||||||
|
log_info(_("WAL receiver PID on node %i is %i"),
|
||||||
|
cell->node_info->node_id,
|
||||||
|
sibling_wal_receiver_pid);
|
||||||
|
sibling_node_wal_receiver_connected = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2867,7 +2928,8 @@ do_primary_failover(void)
|
|||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("node %i is in manual failover mode and is now disconnected from streaming replication"),
|
_("node \"%s\" (ID: %i) is in manual failover mode and is now disconnected from streaming replication"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
|
|
||||||
new_primary_conn = establish_db_connection(new_primary.conninfo, false);
|
new_primary_conn = establish_db_connection(new_primary.conninfo, false);
|
||||||
@@ -3027,8 +3089,6 @@ do_primary_failover(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
update_monitoring_history(void)
|
update_monitoring_history(void)
|
||||||
{
|
{
|
||||||
@@ -3263,6 +3323,8 @@ do_upstream_standby_failover(void)
|
|||||||
if (PQstatus(local_conn) == CONNECTION_OK)
|
if (PQstatus(local_conn) == CONNECTION_OK)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
close_connection(&local_conn);
|
||||||
|
|
||||||
log_debug("sleeping 1 second; %i of %i (\"repmgrd_standby_startup_timeout\") attempts to reconnect to local node",
|
log_debug("sleeping 1 second; %i of %i (\"repmgrd_standby_startup_timeout\") attempts to reconnect to local node",
|
||||||
i + 1,
|
i + 1,
|
||||||
config_file_options.repmgrd_standby_startup_timeout);
|
config_file_options.repmgrd_standby_startup_timeout);
|
||||||
@@ -3271,7 +3333,8 @@ do_upstream_standby_failover(void)
|
|||||||
|
|
||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("unable to reconnect to local node %i"),
|
log_error(_("unable to reconnect to local node \"%s\" (ID: %i)"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
return FAILOVER_STATE_FOLLOW_FAIL;
|
return FAILOVER_STATE_FOLLOW_FAIL;
|
||||||
}
|
}
|
||||||
@@ -3306,7 +3369,8 @@ do_upstream_standby_failover(void)
|
|||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("unable to set node %i's new upstream ID to %i"),
|
_("unable to set node \"%s\" (ID: %i)'s new upstream ID to %i"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
primary_node_info.node_id);
|
primary_node_info.node_id);
|
||||||
|
|
||||||
@@ -3344,8 +3408,10 @@ do_upstream_standby_failover(void)
|
|||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("node %i is now following primary node %i"),
|
_("node \"%s\" (ID: %i) is now following primary node \"%s\" (ID: %i)"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
|
primary_node_info.node_name,
|
||||||
primary_node_info.node_id);
|
primary_node_info.node_id);
|
||||||
|
|
||||||
log_notice("%s", event_details.data);
|
log_notice("%s", event_details.data);
|
||||||
@@ -3410,12 +3476,16 @@ promote_self(void)
|
|||||||
|
|
||||||
r = system(promote_command);
|
r = system(promote_command);
|
||||||
|
|
||||||
|
log_debug("result of promote_command: %i", WEXITSTATUS(r));
|
||||||
|
|
||||||
/* connection should stay up, but check just in case */
|
/* connection should stay up, but check just in case */
|
||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_warning(_("local database connection not available"));
|
log_warning(_("local database connection not available"));
|
||||||
log_detail("\n%s", PQerrorMessage(local_conn));
|
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||||
|
|
||||||
|
close_connection(&local_conn);
|
||||||
|
|
||||||
local_conn = establish_db_connection(local_node_info.conninfo, true);
|
local_conn = establish_db_connection(local_node_info.conninfo, true);
|
||||||
|
|
||||||
/* assume node failed */
|
/* assume node failed */
|
||||||
@@ -3423,24 +3493,37 @@ promote_self(void)
|
|||||||
{
|
{
|
||||||
log_error(_("unable to reconnect to local node"));
|
log_error(_("unable to reconnect to local node"));
|
||||||
log_detail("\n%s", PQerrorMessage(local_conn));
|
log_detail("\n%s", PQerrorMessage(local_conn));
|
||||||
|
|
||||||
|
close_connection(&local_conn);
|
||||||
|
|
||||||
/* XXX handle this */
|
/* XXX handle this */
|
||||||
return FAILOVER_STATE_LOCAL_NODE_FAILURE;
|
return FAILOVER_STATE_LOCAL_NODE_FAILURE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (r != 0)
|
if (WIFEXITED(r) && WEXITSTATUS(r))
|
||||||
{
|
{
|
||||||
int primary_node_id;
|
int primary_node_id = UNKNOWN_NODE_ID;
|
||||||
|
|
||||||
|
log_error(_("promote command failed"));
|
||||||
|
log_detail(_("promote command exited with error code %i"), WEXITSTATUS(r));
|
||||||
|
|
||||||
|
log_info(_("checking if original primary node has reappeared"));
|
||||||
|
|
||||||
upstream_conn = get_primary_connection(local_conn,
|
upstream_conn = get_primary_connection(local_conn,
|
||||||
&primary_node_id,
|
&primary_node_id,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
if (PQstatus(upstream_conn) == CONNECTION_OK && primary_node_id == failed_primary.node_id)
|
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
close_connection(&upstream_conn);
|
||||||
|
}
|
||||||
|
else if (primary_node_id == failed_primary.node_id)
|
||||||
{
|
{
|
||||||
PQExpBufferData event_details;
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
log_notice(_("original primary (ID: %i) reappeared before this standby was promoted - no action taken"),
|
log_notice(_("original primary \"%s\" (ID: %i) reappeared before this standby was promoted - no action taken"),
|
||||||
|
failed_primary.node_name,
|
||||||
failed_primary.node_id);
|
failed_primary.node_id);
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
@@ -3465,9 +3548,6 @@ promote_self(void)
|
|||||||
return FAILOVER_STATE_PRIMARY_REAPPEARED;
|
return FAILOVER_STATE_PRIMARY_REAPPEARED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
log_error(_("promote command failed"));
|
|
||||||
|
|
||||||
create_event_notification(NULL,
|
create_event_notification(NULL,
|
||||||
&config_file_options,
|
&config_file_options,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
@@ -3478,6 +3558,11 @@ promote_self(void)
|
|||||||
return FAILOVER_STATE_PROMOTION_FAILED;
|
return FAILOVER_STATE_PROMOTION_FAILED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Promotion has succeeded - verify local connection is still available
|
||||||
|
*/
|
||||||
|
try_reconnect(&local_conn, &local_node_info);
|
||||||
|
|
||||||
/* bump the electoral term */
|
/* bump the electoral term */
|
||||||
increment_current_term(local_conn);
|
increment_current_term(local_conn);
|
||||||
|
|
||||||
@@ -3493,8 +3578,10 @@ promote_self(void)
|
|||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("node %i promoted to primary; old primary %i marked as failed"),
|
_("node \"%s\" (ID: %i) promoted to primary; old primary \"%s\" (ID: %i) marked as failed"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
|
failed_primary.node_name,
|
||||||
failed_primary.node_id);
|
failed_primary.node_id);
|
||||||
|
|
||||||
/* local_conn is now the primary connection */
|
/* local_conn is now the primary connection */
|
||||||
@@ -3538,6 +3625,8 @@ notify_followers(NodeInfoList *standby_nodes, int follow_node_id)
|
|||||||
cell->node_info->node_name,
|
cell->node_info->node_name,
|
||||||
cell->node_info->node_id);
|
cell->node_info->node_id);
|
||||||
|
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
|
|
||||||
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
|
cell->node_info->conn = establish_db_connection(cell->node_info->conninfo, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3548,6 +3637,7 @@ notify_followers(NodeInfoList *standby_nodes, int follow_node_id)
|
|||||||
cell->node_info->node_id);
|
cell->node_info->node_id);
|
||||||
log_detail("\n%s", PQerrorMessage(cell->node_info->conn));
|
log_detail("\n%s", PQerrorMessage(cell->node_info->conn));
|
||||||
|
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3655,7 +3745,9 @@ follow_new_primary(int new_primary_id)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
new_primary_ok = false;
|
new_primary_ok = false;
|
||||||
log_warning(_("new primary is not in recovery"));
|
log_warning(_("new primary \"%s\" (node ID: %i) is in recovery"),
|
||||||
|
new_primary.node_name,
|
||||||
|
new_primary_id);
|
||||||
close_connection(&upstream_conn);
|
close_connection(&upstream_conn);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3770,15 +3862,18 @@ follow_new_primary(int new_primary_id)
|
|||||||
if (PQstatus(local_conn) == CONNECTION_OK)
|
if (PQstatus(local_conn) == CONNECTION_OK)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
close_connection(&local_conn);
|
||||||
|
|
||||||
log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node",
|
log_debug("sleeping 1 second; %i of %i attempts to reconnect to local node",
|
||||||
i + 1,
|
i + 1,
|
||||||
config_file_options.repmgrd_standby_startup_timeout);
|
config_file_options.repmgrd_standby_startup_timeout);
|
||||||
sleep(1);
|
sleep(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PQstatus(local_conn) != CONNECTION_OK)
|
if (local_conn == NULL || PQstatus(local_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("unable to reconnect to local node %i"),
|
log_error(_("unable to reconnect to local node \"%s\" (ID: %i)"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
return FAILOVER_STATE_FOLLOW_FAIL;
|
return FAILOVER_STATE_FOLLOW_FAIL;
|
||||||
}
|
}
|
||||||
@@ -3792,8 +3887,10 @@ follow_new_primary(int new_primary_id)
|
|||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("node %i now following new upstream node %i"),
|
_("node \"%s\" (ID: %i) now following new upstream node \"%s\" (ID: %i)"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
|
upstream_node_info.node_name,
|
||||||
upstream_node_info.node_id);
|
upstream_node_info.node_id);
|
||||||
|
|
||||||
log_notice("%s", event_details.data);
|
log_notice("%s", event_details.data);
|
||||||
@@ -3848,7 +3945,9 @@ witness_follow_new_primary(int new_primary_id)
|
|||||||
break;
|
break;
|
||||||
case RECTYPE_STANDBY:
|
case RECTYPE_STANDBY:
|
||||||
new_primary_ok = false;
|
new_primary_ok = false;
|
||||||
log_warning(_("new primary is not in recovery"));
|
log_warning(_("new primary \"%s\" (node ID: %i) is in recovery"),
|
||||||
|
new_primary.node_name,
|
||||||
|
new_primary_id);
|
||||||
break;
|
break;
|
||||||
case RECTYPE_UNKNOWN:
|
case RECTYPE_UNKNOWN:
|
||||||
new_primary_ok = false;
|
new_primary_ok = false;
|
||||||
@@ -3886,7 +3985,7 @@ witness_follow_new_primary(int new_primary_id)
|
|||||||
record_status = get_node_record(upstream_conn, local_node_info.node_id, &local_node_info);
|
record_status = get_node_record(upstream_conn, local_node_info.node_id, &local_node_info);
|
||||||
if (record_status != RECORD_FOUND)
|
if (record_status != RECORD_FOUND)
|
||||||
{
|
{
|
||||||
log_error(_("unable to retrieve metadata record found for node %i"),
|
log_error(_("unable to retrieve metadata record for node %i"),
|
||||||
local_node_info.node_id);
|
local_node_info.node_id);
|
||||||
return FAILOVER_STATE_FOLLOW_FAIL;
|
return FAILOVER_STATE_FOLLOW_FAIL;
|
||||||
}
|
}
|
||||||
@@ -3896,8 +3995,10 @@ witness_follow_new_primary(int new_primary_id)
|
|||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("witness node %i now following new primary node %i"),
|
_("witness node \"%s\" (ID: %i) now following new primary node \"%s\" (ID: %i)"),
|
||||||
|
local_node_info.node_name,
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
|
upstream_node_info.node_name,
|
||||||
upstream_node_info.node_id);
|
upstream_node_info.node_id);
|
||||||
|
|
||||||
log_notice("%s", event_details.data);
|
log_notice("%s", event_details.data);
|
||||||
@@ -3999,6 +4100,12 @@ do_election(NodeInfoList *sibling_nodes, int *new_primary_id)
|
|||||||
|
|
||||||
return ELECTION_NOT_CANDIDATE;
|
return ELECTION_NOT_CANDIDATE;
|
||||||
}
|
}
|
||||||
|
if (config_file_options.failover_delay > 0)
|
||||||
|
{
|
||||||
|
log_debug("sleeping %i seconds (\"failover_delay\") before initiating failover",
|
||||||
|
config_file_options.failover_delay);
|
||||||
|
sleep(config_file_options.failover_delay);
|
||||||
|
}
|
||||||
|
|
||||||
/* node priority is set to zero - don't become a candidate, and lose by default */
|
/* node priority is set to zero - don't become a candidate, and lose by default */
|
||||||
if (local_node_info.priority <= 0)
|
if (local_node_info.priority <= 0)
|
||||||
@@ -4029,7 +4136,9 @@ do_election(NodeInfoList *sibling_nodes, int *new_primary_id)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_info(_("primary and this node have the same location (\"%s\")"),
|
log_info(_("primary node \"%s\" (ID: %i) and this node have the same location (\"%s\")"),
|
||||||
|
upstream_node_info.node_name,
|
||||||
|
upstream_node_info.node_id,
|
||||||
local_node_info.location);
|
local_node_info.location);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4131,6 +4240,8 @@ do_election(NodeInfoList *sibling_nodes, int *new_primary_id)
|
|||||||
|
|
||||||
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
if (PQstatus(cell->node_info->conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
close_connection(&cell->node_info->conn);
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4261,7 +4372,8 @@ do_election(NodeInfoList *sibling_nodes, int *new_primary_id)
|
|||||||
{
|
{
|
||||||
if (sibling_replication_info.upstream_node_id != upstream_node_info.node_id)
|
if (sibling_replication_info.upstream_node_id != upstream_node_info.node_id)
|
||||||
{
|
{
|
||||||
log_warning(_("assumed sibling node %i monitoring different upstream node %i"),
|
log_warning(_("assumed sibling node \"%s\" (ID: %i) monitoring different upstream node %i"),
|
||||||
|
cell->node_info->node_name,
|
||||||
cell->node_info->node_id,
|
cell->node_info->node_id,
|
||||||
sibling_replication_info.upstream_node_id);
|
sibling_replication_info.upstream_node_id);
|
||||||
|
|
||||||
@@ -4269,7 +4381,9 @@ do_election(NodeInfoList *sibling_nodes, int *new_primary_id)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
nodes_with_primary_still_visible++;
|
nodes_with_primary_still_visible++;
|
||||||
log_notice(_("node %i last saw primary node %i second(s) ago, considering primary still visible"),
|
log_notice(_("%s node \"%s\" (ID: %i) last saw primary node %i second(s) ago, considering primary still visible"),
|
||||||
|
get_node_type_string(cell->node_info->type),
|
||||||
|
cell->node_info->node_name,
|
||||||
cell->node_info->node_id,
|
cell->node_info->node_id,
|
||||||
sibling_replication_info.upstream_last_seen);
|
sibling_replication_info.upstream_last_seen);
|
||||||
appendPQExpBuffer(&nodes_with_primary_visible,
|
appendPQExpBuffer(&nodes_with_primary_visible,
|
||||||
@@ -4281,7 +4395,9 @@ do_election(NodeInfoList *sibling_nodes, int *new_primary_id)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_info(_("node %i last saw primary node %i second(s) ago"),
|
log_info(_("%s node \"%s\" (ID: %i) last saw primary node %i second(s) ago"),
|
||||||
|
get_node_type_string(cell->node_info->type),
|
||||||
|
cell->node_info->node_name,
|
||||||
cell->node_info->node_id,
|
cell->node_info->node_id,
|
||||||
sibling_replication_info.upstream_last_seen);
|
sibling_replication_info.upstream_last_seen);
|
||||||
}
|
}
|
||||||
@@ -4297,7 +4413,8 @@ do_election(NodeInfoList *sibling_nodes, int *new_primary_id)
|
|||||||
/* don't check 0-priority nodes */
|
/* don't check 0-priority nodes */
|
||||||
if (cell->node_info->priority <= 0)
|
if (cell->node_info->priority <= 0)
|
||||||
{
|
{
|
||||||
log_info(_("node %i has priority of %i, skipping"),
|
log_info(_("node \"%s\" (ID: %i) has priority of %i, skipping"),
|
||||||
|
cell->node_info->node_name,
|
||||||
cell->node_info->node_id,
|
cell->node_info->node_id,
|
||||||
cell->node_info->priority);
|
cell->node_info->priority);
|
||||||
continue;
|
continue;
|
||||||
@@ -4558,8 +4675,8 @@ check_connection(t_node_info *node_info, PGconn **conn)
|
|||||||
node_info->node_name,
|
node_info->node_name,
|
||||||
node_info->node_id);
|
node_info->node_id);
|
||||||
log_detail("\n%s", PQerrorMessage(*conn));
|
log_detail("\n%s", PQerrorMessage(*conn));
|
||||||
PQfinish(*conn);
|
|
||||||
*conn = NULL;
|
close_connection(conn);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PQstatus(*conn) != CONNECTION_OK)
|
if (PQstatus(*conn) != CONNECTION_OK)
|
||||||
@@ -4568,13 +4685,14 @@ check_connection(t_node_info *node_info, PGconn **conn)
|
|||||||
node_info->node_name,
|
node_info->node_name,
|
||||||
node_info->node_id);
|
node_info->node_id);
|
||||||
|
|
||||||
PQfinish(*conn);
|
close_connection(conn);
|
||||||
|
|
||||||
*conn = establish_db_connection(node_info->conninfo, false);
|
*conn = establish_db_connection(node_info->conninfo, false);
|
||||||
|
|
||||||
if (PQstatus(*conn) != CONNECTION_OK)
|
if (PQstatus(*conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
PQfinish(*conn);
|
close_connection(conn);
|
||||||
*conn = NULL;
|
|
||||||
log_warning(_("reconnection to node \"%s\" (ID: %i) failed"),
|
log_warning(_("reconnection to node \"%s\" (ID: %i) failed"),
|
||||||
node_info->node_name,
|
node_info->node_name,
|
||||||
node_info->node_id);
|
node_info->node_id);
|
||||||
@@ -4648,7 +4766,8 @@ handle_sighup(PGconn **conn, t_server_type server_type)
|
|||||||
|
|
||||||
if (reload_config(&config_file_options, server_type))
|
if (reload_config(&config_file_options, server_type))
|
||||||
{
|
{
|
||||||
PQfinish(*conn);
|
close_connection(conn);
|
||||||
|
|
||||||
*conn = establish_db_connection(config_file_options.conninfo, true);
|
*conn = establish_db_connection(config_file_options.conninfo, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4773,11 +4892,9 @@ parse_failover_validation_command(const char *template, t_node_info *node_info,
|
|||||||
static bool
|
static bool
|
||||||
check_node_can_follow(PGconn *local_conn, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_info)
|
check_node_can_follow(PGconn *local_conn, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_info)
|
||||||
{
|
{
|
||||||
t_conninfo_param_list local_repl_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
|
||||||
PGconn *local_repl_conn = NULL;
|
PGconn *local_repl_conn = NULL;
|
||||||
t_system_identification local_identification = T_SYSTEM_IDENTIFICATION_INITIALIZER;
|
t_system_identification local_identification = T_SYSTEM_IDENTIFICATION_INITIALIZER;
|
||||||
|
|
||||||
t_conninfo_param_list follow_target_repl_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
|
||||||
PGconn *follow_target_repl_conn = NULL;
|
PGconn *follow_target_repl_conn = NULL;
|
||||||
t_system_identification follow_target_identification = T_SYSTEM_IDENTIFICATION_INITIALIZER;
|
t_system_identification follow_target_identification = T_SYSTEM_IDENTIFICATION_INITIALIZER;
|
||||||
TimeLineHistoryEntry *follow_target_history = NULL;
|
TimeLineHistoryEntry *follow_target_history = NULL;
|
||||||
@@ -4785,23 +4902,7 @@ check_node_can_follow(PGconn *local_conn, XLogRecPtr local_xlogpos, PGconn *foll
|
|||||||
bool can_follow = true;
|
bool can_follow = true;
|
||||||
bool success;
|
bool success;
|
||||||
|
|
||||||
/* Check local replication connection - we want to execute IDENTIFY_SYSTEM
|
local_repl_conn = establish_replication_connection_from_conn(local_conn, local_node_info.repluser);
|
||||||
* to get the current timeline ID, which might not yet be written to
|
|
||||||
* pg_control.
|
|
||||||
*
|
|
||||||
* TODO: from 9.6, query "pg_stat_wal_receiver" via the existing local connection
|
|
||||||
*/
|
|
||||||
|
|
||||||
initialize_conninfo_params(&local_repl_conninfo, false);
|
|
||||||
|
|
||||||
conn_to_param_list(local_conn, &local_repl_conninfo);
|
|
||||||
|
|
||||||
/* Set the replication user from the node record */
|
|
||||||
param_set(&local_repl_conninfo, "user", local_node_info.repluser);
|
|
||||||
param_set(&local_repl_conninfo, "replication", "1");
|
|
||||||
|
|
||||||
local_repl_conn = establish_db_connection_by_params(&local_repl_conninfo, false);
|
|
||||||
free_conninfo_params(&local_repl_conninfo);
|
|
||||||
|
|
||||||
if (PQstatus(local_repl_conn) != CONNECTION_OK)
|
if (PQstatus(local_repl_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
@@ -4810,6 +4911,7 @@ check_node_can_follow(PGconn *local_conn, XLogRecPtr local_xlogpos, PGconn *foll
|
|||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
success = identify_system(local_repl_conn, &local_identification);
|
success = identify_system(local_repl_conn, &local_identification);
|
||||||
PQfinish(local_repl_conn);
|
PQfinish(local_repl_conn);
|
||||||
|
|
||||||
@@ -4821,25 +4923,13 @@ check_node_can_follow(PGconn *local_conn, XLogRecPtr local_xlogpos, PGconn *foll
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* check replication connection */
|
/* check replication connection */
|
||||||
initialize_conninfo_params(&follow_target_repl_conninfo, false);
|
follow_target_repl_conn = establish_replication_connection_from_conn(follow_target_conn,
|
||||||
|
follow_target_node_info->repluser);
|
||||||
conn_to_param_list(follow_target_conn, &follow_target_repl_conninfo);
|
|
||||||
|
|
||||||
if (strcmp(param_get(&follow_target_repl_conninfo, "user"), follow_target_node_info->repluser) != 0)
|
|
||||||
{
|
|
||||||
param_set(&follow_target_repl_conninfo, "user", follow_target_node_info->repluser);
|
|
||||||
param_set(&follow_target_repl_conninfo, "dbname", "replication");
|
|
||||||
}
|
|
||||||
|
|
||||||
param_set(&follow_target_repl_conninfo, "replication", "1");
|
|
||||||
|
|
||||||
follow_target_repl_conn = establish_db_connection_by_params(&follow_target_repl_conninfo, false);
|
|
||||||
|
|
||||||
free_conninfo_params(&follow_target_repl_conninfo);
|
|
||||||
|
|
||||||
if (PQstatus(follow_target_repl_conn) != CONNECTION_OK)
|
if (PQstatus(follow_target_repl_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_error(_("unable to establish a replication connection to the follow target node"));
|
log_error(_("unable to establish a replication connection to the follow target node"));
|
||||||
|
|
||||||
|
PQfinish(follow_target_repl_conn);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4971,7 +5061,6 @@ check_node_can_follow(PGconn *local_conn, XLogRecPtr local_xlogpos, PGconn *foll
|
|||||||
if (follow_target_history)
|
if (follow_target_history)
|
||||||
pfree(follow_target_history);
|
pfree(follow_target_history);
|
||||||
|
|
||||||
|
|
||||||
return can_follow;
|
return can_follow;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5140,3 +5229,126 @@ parse_child_nodes_disconnect_command(char *parsed_command, char *template, int r
|
|||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
try_primary_reconnect(PGconn **conn, PGconn *local_conn, t_node_info *node_info)
|
||||||
|
{
|
||||||
|
t_conninfo_param_list conninfo_params = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
||||||
|
int i;
|
||||||
|
int max_attempts = config_file_options.reconnect_attempts;
|
||||||
|
|
||||||
|
initialize_conninfo_params(&conninfo_params, false);
|
||||||
|
|
||||||
|
/* we assume by now the conninfo string is parseable */
|
||||||
|
(void) parse_conninfo_string(node_info->conninfo, &conninfo_params, NULL, false);
|
||||||
|
|
||||||
|
/* set some default values if not explicitly provided */
|
||||||
|
param_set_ine(&conninfo_params, "connect_timeout", "2");
|
||||||
|
param_set_ine(&conninfo_params, "fallback_application_name", "repmgr");
|
||||||
|
|
||||||
|
for (i = 0; i < max_attempts; i++)
|
||||||
|
{
|
||||||
|
log_info(_("checking state of node \"%s\" (ID: %i), %i of %i attempts"),
|
||||||
|
node_info->node_name,
|
||||||
|
node_info->node_id,
|
||||||
|
i + 1, max_attempts);
|
||||||
|
|
||||||
|
if (is_server_available_params(&conninfo_params) == true)
|
||||||
|
{
|
||||||
|
PGconn *our_conn;
|
||||||
|
|
||||||
|
log_notice(_("node \"%s\" (ID: %i) has recovered, reconnecting"),
|
||||||
|
node_info->node_name,
|
||||||
|
node_info->node_id);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note: we could also handle the case where node is pingable but
|
||||||
|
* connection denied due to connection exhaustion, by falling back to
|
||||||
|
* degraded monitoring (make configurable)
|
||||||
|
*/
|
||||||
|
our_conn = establish_db_connection_by_params(&conninfo_params, false);
|
||||||
|
|
||||||
|
if (PQstatus(our_conn) == CONNECTION_OK)
|
||||||
|
{
|
||||||
|
free_conninfo_params(&conninfo_params);
|
||||||
|
|
||||||
|
log_info(_("connection to node \"%s\" (ID: %i) succeeded"),
|
||||||
|
node_info->node_name,
|
||||||
|
node_info->node_id);
|
||||||
|
|
||||||
|
if (PQstatus(*conn) == CONNECTION_BAD)
|
||||||
|
{
|
||||||
|
log_verbose(LOG_INFO, _("original connection handle returned CONNECTION_BAD, using new connection"));
|
||||||
|
close_connection(conn);
|
||||||
|
*conn = our_conn;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ExecStatusType ping_result;
|
||||||
|
|
||||||
|
ping_result = connection_ping(*conn);
|
||||||
|
|
||||||
|
if (ping_result != PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
log_info(_("original connection no longer available, using new connection"));
|
||||||
|
close_connection(conn);
|
||||||
|
*conn = our_conn;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_info(_("original connection is still available"));
|
||||||
|
|
||||||
|
PQfinish(our_conn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
node_info->node_status = NODE_STATUS_UP;
|
||||||
|
|
||||||
|
return UNKNOWN_NODE_ID;
|
||||||
|
}
|
||||||
|
|
||||||
|
close_connection(&our_conn);
|
||||||
|
log_notice(_("unable to reconnect to node \"%s\" (ID: %i)"),
|
||||||
|
node_info->node_name,
|
||||||
|
node_info->node_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i + 1 < max_attempts)
|
||||||
|
{
|
||||||
|
int j;
|
||||||
|
log_info(_("sleeping %i seconds until next reconnection attempt"),
|
||||||
|
config_file_options.reconnect_interval);
|
||||||
|
for (j = 0; j < config_file_options.reconnect_interval; j++)
|
||||||
|
{
|
||||||
|
int new_primary_node_id;
|
||||||
|
if (get_new_primary(local_conn, &new_primary_node_id) == true && new_primary_node_id != UNKNOWN_NODE_ID)
|
||||||
|
{
|
||||||
|
if (new_primary_node_id == ELECTION_RERUN_NOTIFICATION)
|
||||||
|
{
|
||||||
|
log_notice(_("received rerun notification"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_notice(_("received notification that new primary is node %i"), new_primary_node_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
free_conninfo_params(&conninfo_params);
|
||||||
|
return new_primary_node_id;
|
||||||
|
}
|
||||||
|
sleep(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log_warning(_("unable to reconnect to node \"%s\" (ID: %i) after %i attempts"),
|
||||||
|
node_info->node_name,
|
||||||
|
node_info->node_id,
|
||||||
|
max_attempts);
|
||||||
|
|
||||||
|
node_info->node_status = NODE_STATUS_DOWN;
|
||||||
|
|
||||||
|
free_conninfo_params(&conninfo_params);
|
||||||
|
|
||||||
|
return UNKNOWN_NODE_ID;
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgrd-physical.h
|
* repmgrd-physical.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
82
repmgrd.c
82
repmgrd.c
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* repmgrd.c - Replication manager daemon
|
* repmgrd.c - Replication manager daemon
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@@ -26,7 +26,6 @@
|
|||||||
#include "repmgr.h"
|
#include "repmgr.h"
|
||||||
#include "repmgrd.h"
|
#include "repmgrd.h"
|
||||||
#include "repmgrd-physical.h"
|
#include "repmgrd-physical.h"
|
||||||
#include "repmgrd-bdr.h"
|
|
||||||
#include "configfile.h"
|
#include "configfile.h"
|
||||||
#include "voting.h"
|
#include "voting.h"
|
||||||
|
|
||||||
@@ -484,9 +483,6 @@ main(int argc, char **argv)
|
|||||||
case REPLICATION_TYPE_PHYSICAL:
|
case REPLICATION_TYPE_PHYSICAL:
|
||||||
log_hint(_("check that 'repmgr (primary|standby) register' was executed for this node"));
|
log_hint(_("check that 'repmgr (primary|standby) register' was executed for this node"));
|
||||||
break;
|
break;
|
||||||
case REPLICATION_TYPE_BDR:
|
|
||||||
log_hint(_("check that 'repmgr bdr register' was executed for this node"));
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
close_connection(&local_conn);
|
close_connection(&local_conn);
|
||||||
@@ -513,12 +509,7 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (config_file_options.replication_type == REPLICATION_TYPE_BDR)
|
if (config_file_options.replication_type == REPLICATION_TYPE_PHYSICAL)
|
||||||
{
|
|
||||||
log_debug("node id is %i", local_node_info.node_id);
|
|
||||||
do_bdr_node_check();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
log_debug("node id is %i, upstream node id is %i",
|
log_debug("node id is %i, upstream node id is %i",
|
||||||
local_node_info.node_id,
|
local_node_info.node_id,
|
||||||
@@ -526,8 +517,6 @@ main(int argc, char **argv)
|
|||||||
do_physical_node_check();
|
do_physical_node_check();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (daemonize == true)
|
if (daemonize == true)
|
||||||
{
|
{
|
||||||
daemonize_process();
|
daemonize_process();
|
||||||
@@ -576,9 +565,6 @@ start_monitoring(void)
|
|||||||
case WITNESS:
|
case WITNESS:
|
||||||
monitor_streaming_witness();
|
monitor_streaming_witness();
|
||||||
break;
|
break;
|
||||||
case BDR:
|
|
||||||
monitor_bdr();
|
|
||||||
return;
|
|
||||||
case UNKNOWN:
|
case UNKNOWN:
|
||||||
/* should never happen */
|
/* should never happen */
|
||||||
break;
|
break;
|
||||||
@@ -771,10 +757,6 @@ setup_event_handlers(void)
|
|||||||
*/
|
*/
|
||||||
switch (config_file_options.replication_type)
|
switch (config_file_options.replication_type)
|
||||||
{
|
{
|
||||||
case REPLICATION_TYPE_BDR:
|
|
||||||
pqsignal(SIGINT, handle_sigint_bdr);
|
|
||||||
pqsignal(SIGTERM, handle_sigint_bdr);
|
|
||||||
break;
|
|
||||||
case REPLICATION_TYPE_PHYSICAL:
|
case REPLICATION_TYPE_PHYSICAL:
|
||||||
pqsignal(SIGINT, handle_sigint_physical);
|
pqsignal(SIGINT, handle_sigint_physical);
|
||||||
pqsignal(SIGTERM, handle_sigint_physical);
|
pqsignal(SIGTERM, handle_sigint_physical);
|
||||||
@@ -835,32 +817,65 @@ check_upstream_connection(PGconn **conn, const char *conninfo)
|
|||||||
/* Check the connection status twice in case it changes after reset */
|
/* Check the connection status twice in case it changes after reset */
|
||||||
bool twice = false;
|
bool twice = false;
|
||||||
|
|
||||||
if (config_file_options.connection_check_type == CHECK_PING)
|
|
||||||
return is_server_available(conninfo);
|
|
||||||
|
|
||||||
if (config_file_options.connection_check_type == CHECK_CONNECTION)
|
log_debug("connection check type is \"%s\"",
|
||||||
|
print_connection_check_type(config_file_options.connection_check_type));
|
||||||
|
/*
|
||||||
|
* For the check types which do not involve using the existing database
|
||||||
|
* connection, we'll perform the actual check, then as an additional
|
||||||
|
* safeguard verify that the connection is still valid (as it might have
|
||||||
|
* gone away during a brief outage between checks).
|
||||||
|
*/
|
||||||
|
if (config_file_options.connection_check_type != CHECK_QUERY)
|
||||||
{
|
{
|
||||||
bool success = true;
|
bool success = true;
|
||||||
PGconn *test_conn = PQconnectdb(conninfo);
|
|
||||||
|
|
||||||
log_debug("check_upstream_connection(): attempting to connect to \"%s\"", conninfo);
|
if (config_file_options.connection_check_type == CHECK_PING)
|
||||||
|
|
||||||
if (PQstatus(test_conn) != CONNECTION_OK)
|
|
||||||
{
|
{
|
||||||
log_warning(_("unable to connect to \"%s\""), conninfo);
|
success = is_server_available(conninfo);
|
||||||
log_detail("\n%s", PQerrorMessage(test_conn));
|
|
||||||
success = false;
|
|
||||||
}
|
}
|
||||||
PQfinish(test_conn);
|
else if (config_file_options.connection_check_type == CHECK_CONNECTION)
|
||||||
|
{
|
||||||
|
PGconn *test_conn = PQconnectdb(conninfo);
|
||||||
|
|
||||||
return success;
|
log_debug("check_upstream_connection(): attempting to connect to \"%s\"", conninfo);
|
||||||
|
|
||||||
|
if (PQstatus(test_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_warning(_("unable to connect to \"%s\""), conninfo);
|
||||||
|
log_detail("\n%s", PQerrorMessage(test_conn));
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
PQfinish(test_conn);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (success == false)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (PQstatus(*conn) == CONNECTION_OK)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Checks have succeeded, but the open connection to the primary has gone away,
|
||||||
|
* possibly due to a brief outage between monitoring intervals - attempt to
|
||||||
|
* reset it.
|
||||||
|
*/
|
||||||
|
log_notice(_("upstream is available but upstream connection has gone away, resetting"));
|
||||||
|
|
||||||
|
PQfinish(*conn);
|
||||||
|
*conn = PQconnectdb(conninfo);
|
||||||
|
|
||||||
|
if (PQstatus(*conn) == CONNECTION_OK)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
if (PQstatus(*conn) != CONNECTION_OK)
|
if (PQstatus(*conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_debug("check_upstream_connection(): connection not OK");
|
log_debug("check_upstream_connection(): upstream connection has gone away, resetting");
|
||||||
if (twice)
|
if (twice)
|
||||||
return false;
|
return false;
|
||||||
/* reconnect */
|
/* reconnect */
|
||||||
@@ -895,6 +910,7 @@ check_upstream_connection(PGconn **conn, const char *conninfo)
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* reconnect */
|
/* reconnect */
|
||||||
|
log_debug("check_upstream_connection(): upstream connection not available, resetting");
|
||||||
PQfinish(*conn);
|
PQfinish(*conn);
|
||||||
*conn = PQconnectdb(conninfo);
|
*conn = PQconnectdb(conninfo);
|
||||||
twice = true;
|
twice = true;
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* repmgrd.h
|
* repmgrd.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,101 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
set -u
|
|
||||||
set -e
|
|
||||||
|
|
||||||
# Process parameters passed to script
|
|
||||||
# -----------------------------------
|
|
||||||
#
|
|
||||||
# This assumes the repmgr "event_notification_command" is defined like this:
|
|
||||||
#
|
|
||||||
# event_notification_command='/path/to/bdr-pgbouncer.sh %n %e %s "%c" "%a" >> /tmp/bdr-failover.log 2>&1'
|
|
||||||
#
|
|
||||||
# Adjust as appropriate.
|
|
||||||
|
|
||||||
NODE_ID=$1
|
|
||||||
EVENT_TYPE=$2
|
|
||||||
SUCCESS=$3
|
|
||||||
NEXT_CONNINFO=$4
|
|
||||||
NEXT_NODE_NAME=$5
|
|
||||||
|
|
||||||
if [ "$EVENT_TYPE" != "bdr_failover" ]; then
|
|
||||||
echo "unable to handle event type '$EVENT_TYPE'"
|
|
||||||
exit
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Define database name here
|
|
||||||
# -------------------------
|
|
||||||
#
|
|
||||||
# Note: this assumes the BDR-enabled database has the same name on
|
|
||||||
# both hosts
|
|
||||||
|
|
||||||
BDR_DBNAME=bdr_db
|
|
||||||
|
|
||||||
# Define PgBouncer hosts here
|
|
||||||
# ---------------------------
|
|
||||||
|
|
||||||
PGBOUNCER_HOSTS="host1 host2"
|
|
||||||
PGBOUNCER_PORTS=(6432 6432)
|
|
||||||
PGBOUNCER_DATABASE_INI=(/path/to/pgbouncer.database.ini /path/to/pgbouncer.database.ini)
|
|
||||||
|
|
||||||
|
|
||||||
# Define local host info here
|
|
||||||
# ---------------------------
|
|
||||||
|
|
||||||
THIS_HOST="host1"
|
|
||||||
THIS_PGBOUNCER_PORT="6432"
|
|
||||||
THIS_DB_PORT="5432"
|
|
||||||
|
|
||||||
# Pause all pgbouncer nodes to minimize impact on clients
|
|
||||||
# -------------------------------------------------------
|
|
||||||
|
|
||||||
i=0
|
|
||||||
for HOST in $PGBOUNCER_HOSTS
|
|
||||||
do
|
|
||||||
PORT="${PGBOUNCER_PORTS[$i]}"
|
|
||||||
|
|
||||||
psql -tc "pause" -h $HOST -p $PORT -U postgres pgbouncer
|
|
||||||
|
|
||||||
i=$((i+1))
|
|
||||||
done
|
|
||||||
|
|
||||||
# Copy pgbouncer database ini file to all nodes and restart pgbouncer
|
|
||||||
# -------------------------------------------------------------------
|
|
||||||
|
|
||||||
i=0
|
|
||||||
THIS_HOSTPORT="$THIS_HOST$THIS_PGBOUNCER_PORT"
|
|
||||||
PGBOUNCER_DATABASE_INI_NEW="/tmp/pgbouncer.database.ini.new"
|
|
||||||
|
|
||||||
for HOST in $PGBOUNCER_HOSTS
|
|
||||||
do
|
|
||||||
PORT="${PGBOUNCER_PORTS[$i]}"
|
|
||||||
|
|
||||||
# Recreate the pgbouncer config file
|
|
||||||
# ----------------------------------
|
|
||||||
echo -e "[databases]\n" > $PGBOUNCER_DATABASE_INI_NEW
|
|
||||||
|
|
||||||
echo -e "$BDR_DBNAME= $NEXT_CONNINFO application_name=pgbouncer_$PORT" >> $PGBOUNCER_DATABASE_INI_NEW
|
|
||||||
|
|
||||||
# Copy file to host
|
|
||||||
# -----------------
|
|
||||||
CONFIG="${PGBOUNCER_DATABASE_INI[$i]}"
|
|
||||||
|
|
||||||
if [ "$HOST$PORT" != "$THIS_HOSTPORT" ]; then
|
|
||||||
rsync $PGBOUNCER_DATABASE_INI_NEW $HOST:$CONFIG
|
|
||||||
else
|
|
||||||
cp $PGBOUNCER_DATABASE_INI_NEW $CONFIG
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Reload and resume PgBouncer
|
|
||||||
# ---------------------------
|
|
||||||
|
|
||||||
psql -tc "reload" -h $HOST -p $PORT -U postgres pgbouncer
|
|
||||||
psql -tc "resume" -h $HOST -p $PORT -U postgres pgbouncer
|
|
||||||
|
|
||||||
i=$((i+1))
|
|
||||||
done
|
|
||||||
|
|
||||||
|
|
||||||
# Clean up generated file
|
|
||||||
rm $PGBOUNCER_DATABASE_INI_NEW
|
|
||||||
|
|
||||||
echo "Reconfiguration of pgbouncer complete"
|
|
||||||
@@ -17,8 +17,6 @@ SELECT * FROM repmgr.replication_status;
|
|||||||
SELECT * FROM repmgr.show_nodes;
|
SELECT * FROM repmgr.show_nodes;
|
||||||
|
|
||||||
-- functions
|
-- functions
|
||||||
SELECT repmgr.am_bdr_failover_handler(-1);
|
|
||||||
SELECT repmgr.am_bdr_failover_handler(NULL);
|
|
||||||
SELECT repmgr.get_new_primary();
|
SELECT repmgr.get_new_primary();
|
||||||
SELECT repmgr.notify_follow_primary(-1);
|
SELECT repmgr.notify_follow_primary(-1);
|
||||||
SELECT repmgr.notify_follow_primary(NULL);
|
SELECT repmgr.notify_follow_primary(NULL);
|
||||||
@@ -27,4 +25,3 @@ SELECT repmgr.set_local_node_id(-1);
|
|||||||
SELECT repmgr.set_local_node_id(NULL);
|
SELECT repmgr.set_local_node_id(NULL);
|
||||||
SELECT repmgr.standby_get_last_updated();
|
SELECT repmgr.standby_get_last_updated();
|
||||||
SELECT repmgr.standby_set_last_updated();
|
SELECT repmgr.standby_set_last_updated();
|
||||||
SELECT repmgr.unset_bdr_failover_handler();
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* strutil.c
|
* strutil.c
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* strutil.h
|
* strutil.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* Functions which need to be executed on the local system.
|
* Functions which need to be executed on the local system.
|
||||||
*
|
*
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* sysutils.h
|
* sysutils.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
2
voting.h
2
voting.h
@@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* voting.h
|
* voting.h
|
||||||
* Copyright (c) 2ndQuadrant, 2010-2019
|
* Copyright (c) 2ndQuadrant, 2010-2020
|
||||||
*
|
*
|
||||||
* This program is free software: you can redistribute it and/or modify
|
* This program is free software: you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
|||||||
Reference in New Issue
Block a user