From 2491b8ae5255bb87a92fb1f87266c1de1c751035 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 27 Sep 2018 16:42:10 +0900 Subject: [PATCH] Add functionality to "pause" repmgrd In some circumstances, e.g. while performing a switchover, it is essential that repmgrd does not take any kind of failover action, as this will put the cluster into an incorrect state. Previously it was necessary to stop repmgrd on all nodes (or at least those nodes which repmgrd would consider as promotion candidates), however this is a cumbersome and potentially risk-prone operation, particularly if the replication cluster contains more than a couple of servers. To prevent this issue from occurring, this patch introduces the ability to "pause" repmgrd on all nodes wth a single command ("repmgr daemon pause") which notifies repmgrd not to take any failover action until the node is "unpaused" ("repmgr daemon unpause"). "repmgr daemon status" provides an overview of each node and whether repmgrd is running, and if so whether it is paused. "repmgr standby switchover" has been modified to automatically pause repmgrd while carrying out the switchover. See documentation for further details. --- Makefile.in | 4 +- dbutils.c | 130 ++++++++- dbutils.h | 20 ++ doc/filelist.sgml | 4 + doc/repmgr-cluster-show.sgml | 4 +- doc/repmgr-daemon-pause.sgml | 109 ++++++++ doc/repmgr-daemon-status.sgml | 165 ++++++++++++ doc/repmgr-daemon-unpause.sgml | 103 +++++++ doc/repmgr-standby-switchover.sgml | 33 ++- doc/repmgr.sgml | 4 + doc/repmgrd-pausing.sgml | 169 ++++++++++++ doc/switchover.sgml | 49 ++-- errcode.h | 1 + repmgr--4.1.sql | 15 ++ repmgr--4.2.sql | 30 +++ repmgr-action-cluster.c | 41 +-- repmgr-action-daemon.c | 420 +++++++++++++++++++++++++++++ repmgr-action-daemon.h | 28 ++ repmgr-action-standby.c | 256 +++++++++++++++++- repmgr-client-global.h | 14 +- repmgr-client.c | 95 ++++++- repmgr-client.h | 5 + repmgr.c | 260 +++++++++++++++++- repmgr.h | 1 + repmgrd-physical.c | 95 ++++--- repmgrd.c | 8 +- repmgrd.h | 1 + 27 files changed, 1943 insertions(+), 121 deletions(-) create mode 100644 doc/repmgr-daemon-pause.sgml create mode 100644 doc/repmgr-daemon-status.sgml create mode 100644 doc/repmgr-daemon-unpause.sgml create mode 100644 doc/repmgrd-pausing.sgml create mode 100644 repmgr-action-daemon.c create mode 100644 repmgr-action-daemon.h diff --git a/Makefile.in b/Makefile.in index 001605c3..36040829 100644 --- a/Makefile.in +++ b/Makefile.in @@ -17,7 +17,6 @@ DATA = \ repmgr--4.1--4.2.sql \ repmgr--4.2.sql - REGRESS = repmgr_extension # Hacky workaround to install the binaries @@ -43,7 +42,7 @@ $(info Building against PostgreSQL $(MAJORVERSION)) REPMGR_CLIENT_OBJS = repmgr-client.o \ repmgr-action-primary.o repmgr-action-standby.o repmgr-action-witness.o \ - repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o \ + repmgr-action-bdr.o repmgr-action-cluster.o repmgr-action-node.o repmgr-action-daemon.o \ configfile.o log.o strutil.o controldata.o dirutil.o compat.o dbutils.o REPMGRD_OBJS = repmgrd.o repmgrd-physical.o repmgrd-bdr.o configfile.o log.o dbutils.o strutil.o controldata.o compat.o DATE=$(shell date "+%Y-%m-%d") @@ -87,6 +86,7 @@ additional-clean: rm -f repmgr-action-bdr.o rm -f repmgr-action-node.o rm -f repmgr-action-cluster.o + rm -f repmgr-action-daemon.o rm -f repmgrd.o rm -f repmgrd-physical.o rm -f repmgrd-bdr.o diff --git a/dbutils.c b/dbutils.c index bc9ba3d2..23a0bf57 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1627,7 +1627,6 @@ repmgrd_set_local_node_id(PGconn *conn, int local_node_id) } - int repmgrd_get_local_node_id(PGconn *conn) { @@ -1686,6 +1685,135 @@ server_in_exclusive_backup_mode(PGconn *conn) } +void +repmgrd_set_pid(PGconn *conn, pid_t repmgrd_pid, const char *pidfile) +{ + PQExpBufferData query; + PGresult *res = NULL; + + log_verbose(LOG_DEBUG, "repmgrd_set_pid(): pid is %i", (int) repmgrd_pid); + + initPQExpBuffer(&query); + + appendPQExpBuffer(&query, + "SELECT repmgr.set_repmgrd_pid(%i, '%s')", + (int) repmgrd_pid, pidfile); + + res = PQexec(conn, query.data); + termPQExpBuffer(&query); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_error(_("unable to execute \"SELECT repmgr.set_repmgrd_pid()\"")); + log_detail("%s", PQerrorMessage(conn)); + } + + PQclear(res); + + return; +} + + +pid_t +repmgrd_get_pid(PGconn *conn) +{ + PGresult *res = NULL; + pid_t repmgrd_pid = UNKNOWN_PID; + + res = PQexec(conn, "SELECT repmgr.get_repmgrd_pid()"); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_error(_("unable to execute \"SELECT repmgr.get_repmgrd_pid()\"")); + log_detail("%s", PQerrorMessage(conn)); + } + else if (!PQgetisnull(res, 0, 0)) + { + repmgrd_pid = atoi(PQgetvalue(res, 0, 0)); + } + + PQclear(res); + + return repmgrd_pid; +} + + +bool +repmgrd_is_running(PGconn *conn) +{ + PGresult *res = NULL; + bool is_running = false; + + res = PQexec(conn, "SELECT repmgr.repmgrd_is_running()"); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_error(_("unable to execute \"SELECT repmgr.repmgrd_is_running()\"")); + log_detail("%s", PQerrorMessage(conn)); + } + else if (!PQgetisnull(res, 0, 0)) + { + is_running = atobool(PQgetvalue(res, 0, 0)); + } + + PQclear(res); + + return is_running; +} + + +bool +repmgrd_is_paused(PGconn *conn) +{ + PGresult *res = NULL; + bool is_paused = false; + + res = PQexec(conn, "SELECT repmgr.repmgrd_is_paused()"); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_error(_("unable to execute \"SELECT repmgr.repmgrd_is_paused()\"")); + log_detail("%s", PQerrorMessage(conn)); + } + else if (!PQgetisnull(res, 0, 0)) + { + is_paused = atobool(PQgetvalue(res, 0, 0)); + } + + PQclear(res); + + return is_paused; +} + + +bool +repmgrd_pause(PGconn *conn, bool pause) +{ + PQExpBufferData query; + PGresult *res = NULL; + bool success = true; + + initPQExpBuffer(&query); + + appendPQExpBuffer(&query, + "SELECT repmgr.repmgrd_pause(%s)", + pause == true ? "TRUE" : "FALSE"); + res = PQexec(conn, query.data); + termPQExpBuffer(&query); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + log_error(_("unable to execute \"SELECT repmgr.repmgrd_pause()\"")); + log_detail("%s", PQerrorMessage(conn)); + + success = false; + } + + PQclear(res); + + return success; +} + /* ================ */ /* result functions */ /* ================ */ diff --git a/dbutils.h b/dbutils.h index da25d677..875fa42d 100644 --- a/dbutils.h +++ b/dbutils.h @@ -327,6 +327,21 @@ typedef struct UNKNOWN_TIMELINE_ID, \ InvalidXLogRecPtr \ } + + +typedef struct RepmgrdInfo { + int node_id; + int pid; + char pid_text[MAXLEN]; + char pid_file[MAXLEN]; + bool pg_running; + char pg_running_text[MAXLEN]; + bool running; + char repmgrd_running[MAXLEN]; + bool paused; +} RepmgrdInfo; + + /* global variables */ extern int server_version_num; @@ -399,6 +414,11 @@ bool identify_system(PGconn *repl_conn, t_system_identification *identification bool repmgrd_set_local_node_id(PGconn *conn, int local_node_id); int repmgrd_get_local_node_id(PGconn *conn); BackupState server_in_exclusive_backup_mode(PGconn *conn); +void repmgrd_set_pid(PGconn *conn, pid_t repmgrd_pid, const char *pidfile); +pid_t repmgrd_get_pid(PGconn *conn); +bool repmgrd_is_running(PGconn *conn); +bool repmgrd_is_paused(PGconn *conn); +bool repmgrd_pause(PGconn *conn, bool pause); /* extension functions */ ExtensionStatus get_repmgr_extension_status(PGconn *conn); diff --git a/doc/filelist.sgml b/doc/filelist.sgml index 7a1faa71..1bb2e7f9 100644 --- a/doc/filelist.sgml +++ b/doc/filelist.sgml @@ -58,6 +58,7 @@ + @@ -78,6 +79,9 @@ + + + diff --git a/doc/repmgr-cluster-show.sgml b/doc/repmgr-cluster-show.sgml index a096ff12..944d866c 100644 --- a/doc/repmgr-cluster-show.sgml +++ b/doc/repmgr-cluster-show.sgml @@ -90,7 +90,7 @@ repmgr cluster show accepts an optional parameter --csv, which outputs the replication cluster's status in a simple CSV format, suitable for - parsing by scripts: + parsing by scripts, e.g.: $ repmgr -f /etc/repmgr.conf cluster show --csv 1,-1,-1 @@ -165,7 +165,7 @@ See also - , + , , diff --git a/doc/repmgr-daemon-pause.sgml b/doc/repmgr-daemon-pause.sgml new file mode 100644 index 00000000..c2845611 --- /dev/null +++ b/doc/repmgr-daemon-pause.sgml @@ -0,0 +1,109 @@ + + + repmgr daemon pause + + + + repmgr daemon pause + + + + repmgr daemon pause + Instruct all repmgrd instances in the replication cluster to pause failover operations + + + + Description + + This command can be run on any active node in the replication cluster to instruct all + running repmgrd instances to "pause" themselves, i.e. take no + action (such as promoting themselves or following a new primary) if a failover event is detected. + + + This functionality is useful for performing maintenance operations, such as switchovers + or upgrades, which might otherwise trigger a failover if repmgrd + is running normally. + + + + It's important to wait a few seconds after restarting PostgreSQL on any node before running + repmgr daemon pause, as the repmgrd instance + on the restarted node will take a second or two before it has updated its status. + + + + will instruct all previously paused repmgrd + instances to resume normal failover operation. + + + + + Execution + + repmgr daemon pause can be executed on any active node in the + replication cluster. A valid repmgr.conf file is required. + It will have no effect on previously paused nodes. + + + + + Example + + +$ repmgr -f /etc/repmgr.conf daemon pause +NOTICE: node 1 (node1) paused +NOTICE: node 2 (node2) paused +NOTICE: node 3 (node3) paused + + + + + Options + + + + + + Check if nodes are reachable but don't pause repmgrd. + + + + + + + + Exit codes + + Following exit codes can be emitted by repmgr daemon unpause: + + + + + + + + repmgrd could be paused on all nodes. + + + + + + + + + repmgrd could not be paused on one or mode nodes. + + + + + + + + + See also + + , + + + + diff --git a/doc/repmgr-daemon-status.sgml b/doc/repmgr-daemon-status.sgml new file mode 100644 index 00000000..1d2dc765 --- /dev/null +++ b/doc/repmgr-daemon-status.sgml @@ -0,0 +1,165 @@ + + + repmgr daemon status + + + + repmgr daemon status + + + + repmgr daemon status + display information about the status of repmgrd on each node in the cluster + + + + Description + + This command provides an overview over all active nodes in the cluster and the state + of each node's repmgrd instance. It can be used to check + the result of and + operations. + + + + + Execution + + repmgr daemon status can be executed on any active node in the + replication cluster. A valid repmgr.conf file is required. + + + + + After restarting PostgreSQL on any node, the repmgrd instance + will take a second or two before it is able to update its status. Until then, + repmgrd will be shown as not running. + + + + + + + Examples + + repmgrd running normally on all nodes: + $ repmgr -f /etc/repmgr.conf daemon status + ID | Name | Role | Status | repmgrd | PID | Paused? +----+-------+---------+---------+---------+------+--------- + 1 | node1 | primary | running | running | 7851 | no + 2 | node2 | standby | running | running | 7889 | no + 3 | node3 | standby | running | running | 7918 | no + + + + repmgrd paused on all nodes (using ): + $ repmgr -f /etc/repmgr.conf daemon status + ID | Name | Role | Status | repmgrd | PID | Paused? +----+-------+---------+---------+---------+------+--------- + 1 | node1 | primary | running | running | 7851 | yes + 2 | node2 | standby | running | running | 7889 | yes + 3 | node3 | standby | running | running | 7918 | yes + + + + repmgrd not running on one node: + $ repmgr -f /etc/repmgr.conf daemon status + ID | Name | Role | Status | repmgrd | PID | Paused? +----+-------+---------+---------+-------------+------+--------- + 1 | node1 | primary | running | running | 7851 | yes + 2 | node2 | standby | running | not running | n/a | n/a + 3 | node3 | standby | running | running | 7918 | yes + + + + + + Options + + + + + + + + repmgr daemon status accepts an optional parameter --csv, which + outputs the replication cluster's status in a simple CSV format, suitable for + parsing by scripts, e.g.: + + $ repmgr -f /etc/repmgr.conf daemon status --csv + 1,node1,primary,1,1,10204,1 + 2,node2,standby,1,0,-1,1 + 3,node3,standby,1,1,10225,1 + + + The columns have following meanings: + + + + node ID + + + + + + node name + + + + + + node type (primary or standby) + + + + + + PostgreSQL server running + + + + + + repmgrd running (1 = running, 0 = not running) + + + + + + repmgrd PID (-1 if not running) + + + + + + repmgrd paused (1 = paused, 0 = not paused) + + + + + + + + + + + + + Display the full text of any database connection error messages + + + + + + + + + + + + See also + + , , + + + diff --git a/doc/repmgr-daemon-unpause.sgml b/doc/repmgr-daemon-unpause.sgml new file mode 100644 index 00000000..9e640313 --- /dev/null +++ b/doc/repmgr-daemon-unpause.sgml @@ -0,0 +1,103 @@ + + + repmgr daemon unpause + + + + repmgr daemon unpause + + + + repmgr daemon unpause + Instruct all repmgrd instances in the replication cluster to resume failover operations + + + + Description + + This command can be run on any active node in the replication cluster to instruct all + running repmgrd instances to "unpause" + (following a previous execution of ) + and resume normal failover/monitoring operation. + + + + + It's important to wait a few seconds after restarting PostgreSQL on any node before running + repmgr daemon pause, as the repmgrd instance + on the restarted node will take a second or two before it has updated its status. + + + + + + + Execution + + repmgr daemon unpause can be executed on any active node in the + replication cluster. A valid repmgr.conf file is required. + It will have no effect on nodes which are not already paused. + + + + + Example + + +$ repmgr -f /etc/repmgr.conf daemon unpause +NOTICE: node 1 (node1) unpaused +NOTICE: node 2 (node2) unpaused +NOTICE: node 3 (node3) unpaused + + + + + Options + + + + + + Check if nodes are reachable but don't unpause repmgrd. + + + + + + + + Exit codes + + Following exit codes can be emitted by repmgr daemon unpause: + + + + + + + + repmgrd could be unpaused on all nodes. + + + + + + + + + repmgrd could not be unpaused on one or mode nodes. + + + + + + + + + See also + + , + + + + diff --git a/doc/repmgr-standby-switchover.sgml b/doc/repmgr-standby-switchover.sgml index cbd5d7a1..d8cf6d70 100644 --- a/doc/repmgr-standby-switchover.sgml +++ b/doc/repmgr-standby-switchover.sgml @@ -35,6 +35,10 @@ &repmgr; will attempt to check for potential issues but cannot guarantee a successful switchover. + + &repmgr; will refuse to perform the switchover if an exclusive backup is running on + the current primary. + For more details on performing a switchover, including preparation and configuration, @@ -43,11 +47,14 @@ - repmgrd should not be active on any nodes while a switchover is being - executed. This restriction may be lifted in a later version. + From repmgr 4.2, &repmgr; will instruct any running + repmgrd instances to pause operations while the switchover + is being carried out, to prevent repmgrd from + unintentionally promoting a node. For more details, see . - &repmgr; will not perform the switchover if an exclusive backup is running on the current primary. + Users of &repmgr; versions prior to 4.2 should ensure that repmgrd + is not running on any nodes while a switchover is being executed. @@ -61,8 +68,9 @@ - Promote standby to primary, even if it is behind original primary - (original primary will be shut down in any case). + Promote standby to primary, even if it is behind or has diverged + from the original primary. The original primary will be shut down in any case, + and will need to be manually reintegrated into the replication cluster. @@ -122,6 +130,21 @@ + + + + + Don't pause repmgrd while executing a switchover. + + + This option should not be used unless you take steps by other means + to ensure repmgrd is paused or not + running on all nodes. + + + + + diff --git a/doc/repmgr.sgml b/doc/repmgr.sgml index 90ef1bc4..68903d2c 100644 --- a/doc/repmgr.sgml +++ b/doc/repmgr.sgml @@ -86,6 +86,7 @@ &repmgrd-cascading-replication; &repmgrd-network-split; &repmgrd-witness-server; + &repmgrd-pausing; &repmgrd-degraded-monitoring; &repmgrd-monitoring; &repmgrd-bdr; @@ -112,6 +113,9 @@ &repmgr-cluster-crosscheck; &repmgr-cluster-event; &repmgr-cluster-cleanup; + &repmgr-daemon-status; + &repmgr-daemon-pause; + &repmgr-daemon-unpause; &appendix-release-notes; diff --git a/doc/repmgrd-pausing.sgml b/doc/repmgrd-pausing.sgml new file mode 100644 index 00000000..ccef2b61 --- /dev/null +++ b/doc/repmgrd-pausing.sgml @@ -0,0 +1,169 @@ + + + + repmgrd + pausing + + + + pausing repmgrd + + + Pausing repmgrd + + + In normal operation, repmgrd monitors the state of the + PostgreSQL node it is running on, and will take appropriate action if problems + are detected, e.g. (if so configured) promote the node to primary, if the existing + primary has been determined as failed. + + + + However, repmgrd is unable to distinguish between + planned outages (such as performing a switchover + or upgrading a server), and an actual server outage. In versions prior to &repmgr; 4.2 + it was necessary to stop repmgrd on all nodes (or at least + on all nodes where repmgrd is + configured for automatic failover) + to prevent repmgrd from making changes to the + replication cluster. + + + From &repmgr; 4.2, repmgrd + can now be "paused", i.e. instructed not to take any action such as performing a failover. + This can be done from any node in the cluster, removing the need to stop/restart + each repmgrd individually. + + + + Prerequisites for pausing <application>repmgrd</application> + + In order to be able to pause/unpause repmgrd, following + prerequisites must be met: + + + + &repmgr; 4.2 or later must be installed on all nodes. + + + + The same major &repmgr; version (e.g. 4.2) must be installed on all nodes (and preferably the same minor version). + + + + + PostgreSQL on all nodes must be accessible from the node where the + pause/unpause operation is executed, using the + conninfo string shown by repmgr cluster show. + + + + + + + These conditions are required for normal &repmgr; operation in any case. + + + + + + + Pausing/unpausing <application>repmgrd</application> + + To pause repmgrd, execute repmgr daemon pause, e.g.: + +$ repmgr -f /etc/repmgr.conf daemon pause +NOTICE: node 1 (node1) paused +NOTICE: node 2 (node2) paused +NOTICE: node 3 (node3) paused + + + The state of repmgrd on each node can be checked with + repmgr daemon status, e.g.: + $ repmgr -f /etc/repmgr.conf daemon status + ID | Name | Role | Status | repmgrd | PID | Paused? +----+-------+---------+---------+---------+------+--------- + 1 | node1 | primary | running | running | 7851 | yes + 2 | node2 | standby | running | running | 7889 | yes + 3 | node3 | standby | running | running | 7918 | yes + + + + + If executing a switchover with repmgr standby switchover, + &repmgr; will automatically pause/unpause repmgrd as part of the switchover process. + + + + + If the primary (in this example, node1) is stopped, repmgrd + running on one of the standbys (here: node2) will react like this: + +[2018-09-20 12:22:21] [WARNING] unable to connect to upstream node "node1" (node ID: 1) +[2018-09-20 12:22:21] [INFO] checking state of node 1, 1 of 5 attempts +[2018-09-20 12:22:21] [INFO] sleeping 1 seconds until next reconnection attempt +... +[2018-09-20 12:22:24] [INFO] sleeping 1 seconds until next reconnection attempt +[2018-09-20 12:22:25] [INFO] checking state of node 1, 5 of 5 attempts +[2018-09-20 12:22:25] [WARNING] unable to reconnect to node 1 after 5 attempts +[2018-09-20 12:22:25] [NOTICE] node is paused +[2018-09-20 12:22:33] [INFO] node "node2" (node ID: 2) monitoring upstream node "node1" (node ID: 1) in degraded state +[2018-09-20 12:22:33] [DETAIL] repmgrd paused by administrator +[2018-09-20 12:22:33] [HINT] execute "repmgr daemon unpause" to resume normal failover mode + + + If the primary becomes available again (e.g. following a software upgrade), repmgrd + will automatically reconnect, e.g.: + +[2018-09-20 13:12:41] [NOTICE] reconnected to upstream node 1 after 8 seconds, resuming monitoring + + + + To unpause repmgrd, execute repmgr daemon unpause, e.g.: + +$ repmgr -f /etc/repmgr.conf daemon pause +NOTICE: node 1 (node1) unpaused +NOTICE: node 2 (node2) unpaused +NOTICE: node 3 (node3) unpaused + + + + + If the previous primary is no longer accessible when repmgrd + is unpaused, no failover action will be taken. Instead, a new primary must be manually promoted using + repmgr standby promote, + and any standbys attached to the new primary with + repmgr standby follow. + + + This is to prevent repmgr daemon unpause + resulting in the automatic promotion of a new primary, which may be a problem particularly + in larger clusters, where repmgrd could select a different promotion + candidate to the one intended by the administrator. + + + + + Details on the <application>repmgrd</application> pausing mechanism + + + The pause state of each node will be stored over a PostgreSQL restart. + + + + repmgr daemon pause and + repmgr daemon unpause can be + executed even if repmgrd is not running; in this case, + repmgrd will start up in whichever pause state has been set. + + + + repmgr daemon pause and + repmgr daemon unpause + do not stop/start repmgrd. + + + + + + diff --git a/doc/switchover.sgml b/doc/switchover.sgml index e3999112..56683c93 100644 --- a/doc/switchover.sgml +++ b/doc/switchover.sgml @@ -19,9 +19,10 @@ repmgr standby switchover differs from other &repmgr; - actions in that it also performs actions on another server (the demotion - candidate), which means passwordless SSH access is required to that server - from the one where repmgr standby switchover is executed. + actions in that it also performs actions on other servers (the demotion + candidate, and optionally any other servers which are to follow the new primary), + which means passwordless SSH access is required to those servers from the one where + repmgr standby switchover is executed. @@ -153,12 +154,18 @@ manually with repmgr node check --archive-ready. - - - Ensure that repmgrd is *not* running anywhere to prevent it unintentionally - promoting a node. This restriction will be removed in a future &repmgr; version. - - + + + From repmgr 4.2, &repmgr; will instruct any running + repmgrd instances to pause operations while the switchover + is being carried out, to prevent repmgrd from + unintentionally promoting a node. For more details, see . + + + Users of &repmgr; versions prior to 4.2 should ensure that repmgrd + is not running on any nodes while a switchover is being executed. + + @@ -303,7 +310,21 @@ 2 | node2 | primary | * running | | default | host=node2 dbname=repmgr user=repmgr + + If repmgrd is in use, it's worth double-checking that + all nodes are unpaused by executing repmgr-daemon-status. + + + + + Users of &repmgr; versions prior to 4.2 will need to manually restart repmgrd + on all nodes after the switchover is completed. + + + + + switchover @@ -329,18 +350,8 @@ for details. - - - repmgrd should not be running with setting failover=automatic - in repmgr.conf when a switchover is carried out, otherwise the - repmgrd daemon may try and promote a standby by itself. - - - - We hope to remove some of these restrictions in future versions of &repmgr;. - diff --git a/errcode.h b/errcode.h index b7d4c688..a7a4f770 100644 --- a/errcode.h +++ b/errcode.h @@ -47,5 +47,6 @@ #define ERR_FOLLOW_FAIL 23 #define ERR_REJOIN_FAIL 24 #define ERR_NODE_STATUS 25 +#define ERR_REPMGRD_PAUSE 26 #endif /* _ERRCODE_H_ */ diff --git a/repmgr--4.1.sql b/repmgr--4.1.sql index f012853f..d73d988b 100644 --- a/repmgr--4.1.sql +++ b/repmgr--4.1.sql @@ -145,6 +145,21 @@ CREATE FUNCTION unset_bdr_failover_handler() AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler' LANGUAGE C STRICT; +CREATE FUNCTION get_repmgrd_pid() + RETURNS INT + AS 'MODULE_PATHNAME', 'get_repmgrd_pid' + LANGUAGE C STRICT; + +CREATE FUNCTION get_repmgrd_pidfile() + RETURNS TEXT + AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile' + LANGUAGE C STRICT; + +CREATE FUNCTION set_repmgrd_pid(INT, TEXT) + RETURNS VOID + AS 'MODULE_PATHNAME', 'set_repmgrd_pid' + LANGUAGE C STRICT; + CREATE VIEW repmgr.replication_status AS SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name, diff --git a/repmgr--4.2.sql b/repmgr--4.2.sql index f012853f..c0567ca3 100644 --- a/repmgr--4.2.sql +++ b/repmgr--4.2.sql @@ -145,6 +145,36 @@ CREATE FUNCTION unset_bdr_failover_handler() AS 'MODULE_PATHNAME', 'unset_bdr_failover_handler' LANGUAGE C STRICT; +CREATE FUNCTION get_repmgrd_pid() + RETURNS INT + AS 'MODULE_PATHNAME', 'get_repmgrd_pid' + LANGUAGE C STRICT; + +CREATE FUNCTION get_repmgrd_pidfile() + RETURNS TEXT + AS 'MODULE_PATHNAME', 'get_repmgrd_pidfile' + LANGUAGE C STRICT; + +CREATE FUNCTION set_repmgrd_pid(INT, TEXT) + RETURNS VOID + AS 'MODULE_PATHNAME', 'set_repmgrd_pid' + LANGUAGE C STRICT; + +CREATE FUNCTION repmgrd_is_running() + RETURNS BOOL + AS 'MODULE_PATHNAME', 'repmgrd_is_running' + LANGUAGE C STRICT; + +CREATE FUNCTION repmgrd_pause(BOOL) + RETURNS VOID + AS 'MODULE_PATHNAME', 'repmgrd_pause' + LANGUAGE C STRICT; + +CREATE FUNCTION repmgrd_is_paused() + RETURNS BOOL + AS 'MODULE_PATHNAME', 'repmgrd_is_paused' + LANGUAGE C STRICT; + CREATE VIEW repmgr.replication_status AS SELECT m.primary_node_id, m.standby_node_id, n.node_name AS standby_name, diff --git a/repmgr-action-cluster.c b/repmgr-action-cluster.c index 54a771d1..b41229f0 100644 --- a/repmgr-action-cluster.c +++ b/repmgr-action-cluster.c @@ -26,7 +26,6 @@ #define SHOW_HEADER_COUNT 7 - typedef enum { SHOW_ID = 0, @@ -51,14 +50,6 @@ typedef enum } EventHeader; - -struct ColHeader -{ - char title[MAXLEN]; - int max_length; - int cur_length; -}; - struct ColHeader headers_show[SHOW_HEADER_COUNT]; struct ColHeader headers_event[EVENT_HEADER_COUNT]; @@ -159,7 +150,7 @@ do_cluster_show(void) else { item_list_append_format(&warnings, - "unable to connect to node \"%s\" (ID: %i)", + "unable to connect to node \"%s\" (ID: %i)", cell->node_info->node_name, cell->node_info->node_id); } } @@ -364,36 +355,10 @@ do_cluster_show(void) } + /* Print column header row (text mode only) */ if (runtime_options.output_mode == OM_TEXT) { - for (i = 0; i < SHOW_HEADER_COUNT; i++) - { - if (i == 0) - printf(" "); - else - printf(" | "); - - printf("%-*s", - headers_show[i].max_length, - headers_show[i].title); - } - printf("\n"); - printf("-"); - - for (i = 0; i < SHOW_HEADER_COUNT; i++) - { - int j; - - for (j = 0; j < headers_show[i].max_length; j++) - printf("-"); - - if (i < (SHOW_HEADER_COUNT - 1)) - printf("-+-"); - else - printf("-"); - } - - printf("\n"); + print_status_header(SHOW_HEADER_COUNT, headers_show); } for (cell = nodes.head; cell; cell = cell->next) diff --git a/repmgr-action-daemon.c b/repmgr-action-daemon.c new file mode 100644 index 00000000..a6351df0 --- /dev/null +++ b/repmgr-action-daemon.c @@ -0,0 +1,420 @@ +/* + * repmgr-action-daemon.c + * + * Implements repmgrd actions for the repmgr command line utility + * Copyright (c) 2ndQuadrant, 2010-2018 + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "repmgr.h" + +#include "repmgr-client-global.h" +#include "repmgr-action-daemon.h" + + + +/* + * Possibly also show: + * - repmgrd start time? + * - repmgrd mode + * - priority + * - whether promotion candidate (due to zero priority/different location) + */ + +typedef enum +{ + STATUS_ID = 0, + STATUS_NAME, + STATUS_ROLE, + STATUS_PG, + STATUS_RUNNING, + STATUS_PID, + STATUS_PAUSED +} StatusHeader; + +#define STATUS_HEADER_COUNT 7 + +struct ColHeader headers_status[STATUS_HEADER_COUNT]; + +static void fetch_node_records(PGconn *conn, NodeInfoList *node_list); +static void _do_repmgr_pause(bool pause); + + +void +do_daemon_status(void) +{ + PGconn *conn = NULL; + NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER; + NodeInfoListCell *cell = NULL; + int i; + RepmgrdInfo **repmgrd_info; + ItemList warnings = {NULL, NULL}; + + /* Connect to local database to obtain cluster connection data */ + log_verbose(LOG_INFO, _("connecting to database")); + + if (strlen(config_file_options.conninfo)) + conn = establish_db_connection(config_file_options.conninfo, true); + else + conn = establish_db_connection_by_params(&source_conninfo, true); + + fetch_node_records(conn, &nodes); + + repmgrd_info = (RepmgrdInfo **) pg_malloc0(sizeof(RepmgrdInfo *) * nodes.node_count); + + if (repmgrd_info == NULL) + { + log_error(_("unable to allocate memory")); + exit(ERR_OUT_OF_MEMORY); + } + + strncpy(headers_status[STATUS_ID].title, _("ID"), MAXLEN); + strncpy(headers_status[STATUS_NAME].title, _("Name"), MAXLEN); + strncpy(headers_status[STATUS_ROLE].title, _("Role"), MAXLEN); + strncpy(headers_status[STATUS_PG].title, _("Status"), MAXLEN); + strncpy(headers_status[STATUS_RUNNING].title, _("repmgrd"), MAXLEN); + strncpy(headers_status[STATUS_PID].title, _("PID"), MAXLEN); + strncpy(headers_status[STATUS_PAUSED].title, _("Paused?"), MAXLEN); + + for (i = 0; i < STATUS_HEADER_COUNT; i++) + { + headers_status[i].max_length = strlen(headers_status[i].title); + } + + i = 0; + + for (cell = nodes.head; cell; cell = cell->next) + { + int j; + + repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo)); + repmgrd_info[i]->node_id = cell->node_info->node_id; + repmgrd_info[i]->pid = UNKNOWN_PID; + repmgrd_info[i]->paused = false; + repmgrd_info[i]->running = false; + repmgrd_info[i]->pg_running = true; + + cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo); + + if (PQstatus(cell->node_info->conn) != CONNECTION_OK) + { + if (runtime_options.verbose) + { + char error[MAXLEN]; + + strncpy(error, PQerrorMessage(cell->node_info->conn), MAXLEN); + + item_list_append_format(&warnings, + "when attempting to connect to node \"%s\" (ID: %i), following error encountered :\n\"%s\"", + cell->node_info->node_name, cell->node_info->node_id, trim(error)); + } + else + { + item_list_append_format(&warnings, + "unable to connect to node \"%s\" (ID: %i)", + cell->node_info->node_name, cell->node_info->node_id); + } + + repmgrd_info[i]->pg_running = false; + maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("not running")); + maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("n/a")); + maxlen_snprintf(repmgrd_info[i]->pid_text, "%s", _("n/a")); + } + else + { + maxlen_snprintf(repmgrd_info[i]->pg_running_text, "%s", _("running")); + + repmgrd_info[i]->pid = repmgrd_get_pid(cell->node_info->conn); + + repmgrd_info[i]->running = repmgrd_is_running(cell->node_info->conn); + + if (repmgrd_info[i]->running == true) + { + maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("running")); + } + else + { + maxlen_snprintf(repmgrd_info[i]->repmgrd_running, "%s", _("not running")); + } + + if (repmgrd_info[i]->pid == UNKNOWN_PID) + { + maxlen_snprintf(repmgrd_info[i]->pid_text, "%s", _("n/a")); + } + else + { + maxlen_snprintf(repmgrd_info[i]->pid_text, "%i", repmgrd_info[i]->pid); + } + + repmgrd_info[i]->paused = repmgrd_is_paused(cell->node_info->conn); + + PQfinish(cell->node_info->conn); + } + + + headers_status[STATUS_NAME].cur_length = strlen(cell->node_info->node_name); + headers_status[STATUS_ROLE].cur_length = strlen(get_node_type_string(cell->node_info->type)); + headers_status[STATUS_PID].cur_length = strlen(repmgrd_info[i]->pid_text); + headers_status[STATUS_RUNNING].cur_length = strlen(repmgrd_info[i]->repmgrd_running); + headers_status[STATUS_PG].cur_length = strlen(repmgrd_info[i]->pg_running_text); + + for (j = 0; j < STATUS_HEADER_COUNT; j++) + { + if (headers_status[j].cur_length > headers_status[j].max_length) + { + headers_status[j].max_length = headers_status[j].cur_length; + } + } + + i++; + } + + /* Print column header row (text mode only) */ + if (runtime_options.output_mode == OM_TEXT) + { + print_status_header(STATUS_HEADER_COUNT, headers_status); + } + + i = 0; + + for (cell = nodes.head; cell; cell = cell->next) + { + if (runtime_options.output_mode == OM_CSV) + { + printf("%i,%s,%s,%i,%i,%i,%i\n", + cell->node_info->node_id, + cell->node_info->node_name, + get_node_type_string(cell->node_info->type), + repmgrd_info[i]->pg_running ? 1 : 0, + repmgrd_info[i]->running ? 1 : 0, + repmgrd_info[i]->pid, + repmgrd_info[i]->paused ? 1 : 0); + } + else + { + printf(" %-*i ", headers_status[STATUS_ID].max_length, cell->node_info->node_id); + printf("| %-*s ", headers_status[STATUS_NAME].max_length, cell->node_info->node_name); + printf("| %-*s ", headers_status[STATUS_ROLE].max_length, get_node_type_string(cell->node_info->type)); + + printf("| %-*s ", headers_status[STATUS_PG].max_length, repmgrd_info[i]->pg_running_text); + printf("| %-*s ", headers_status[STATUS_RUNNING].max_length, repmgrd_info[i]->repmgrd_running); + printf("| %-*s ", headers_status[STATUS_PID].max_length, repmgrd_info[i]->pid_text); + + if (repmgrd_info[i]->pid == UNKNOWN_PID) + printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, "n/a"); + else + printf("| %-*s ", headers_status[STATUS_PAUSED].max_length, repmgrd_info[i]->paused ? "yes" : "no"); + + printf("\n"); + } + + free(repmgrd_info[i]); + i++; + } + + free(repmgrd_info); + + /* emit any warnings */ + + if (warnings.head != NULL && runtime_options.terse == false && runtime_options.output_mode != OM_CSV) + { + ItemListCell *cell = NULL; + + printf(_("\nWARNING: following issues were detected\n")); + for (cell = warnings.head; cell; cell = cell->next) + { + printf(_(" - %s\n"), cell->string); + } + + if (runtime_options.verbose == false) + { + log_hint(_("execute with --verbose option to see connection error messages")); + } + } +} + +void +do_daemon_pause(void) +{ + _do_repmgr_pause(true); +} + +void +do_daemon_unpause(void) +{ + _do_repmgr_pause(false); +} + + +static void +_do_repmgr_pause(bool pause) +{ + PGconn *conn = NULL; + NodeInfoList nodes = T_NODE_INFO_LIST_INITIALIZER; + NodeInfoListCell *cell = NULL; + RepmgrdInfo **repmgrd_info; + int i; + int error_nodes = 0; + + repmgrd_info = (RepmgrdInfo **) pg_malloc0(sizeof(RepmgrdInfo *) * nodes.node_count); + + if (repmgrd_info == NULL) + { + log_error(_("unable to allocate memory")); + exit(ERR_OUT_OF_MEMORY); + } + + /* Connect to local database to obtain cluster connection data */ + log_verbose(LOG_INFO, _("connecting to database")); + + if (strlen(config_file_options.conninfo)) + conn = establish_db_connection(config_file_options.conninfo, true); + else + conn = establish_db_connection_by_params(&source_conninfo, true); + + fetch_node_records(conn, &nodes); + + i = 0; + + for (cell = nodes.head; cell; cell = cell->next) + { + repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo)); + repmgrd_info[i]->node_id = cell->node_info->node_id; + + log_verbose(LOG_DEBUG, "pausing node %i (%s)", + cell->node_info->node_id, + cell->node_info->node_name); + cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo); + + if (PQstatus(cell->node_info->conn) != CONNECTION_OK) + { + log_warning(_("unable to connect to node %i"), + cell->node_info->node_id); + error_nodes++; + } + else + { + if (runtime_options.dry_run == true) + { + if (pause == true) + { + log_info(_("would pause node %i (%s) "), + cell->node_info->node_id, + cell->node_info->node_name); + } + else + { + log_info(_("would unpause node %i (%s) "), + cell->node_info->node_id, + cell->node_info->node_name); + } + } + else + { + bool success = repmgrd_pause(cell->node_info->conn, pause); + + if (success == false) + error_nodes++; + + log_notice(_("node %i (%s) %s"), + cell->node_info->node_id, + cell->node_info->node_name, + success == true + ? pause == true ? "paused" : "unpaused" + : pause == true ? "not paused" : "not unpaused"); + } + PQfinish(cell->node_info->conn); + } + i++; + } + + if (error_nodes > 0) + { + if (pause == true) + { + log_error(_("unable to pause %i node(s)"), error_nodes); + } + else + { + log_error(_("unable to unpause %i node(s)"), error_nodes); + } + + log_hint(_("execute \"repmgr daemon status\" to view current status")); + + exit(ERR_REPMGRD_PAUSE); + } + + exit(SUCCESS); +} + + + +void +fetch_node_records(PGconn *conn, NodeInfoList *node_list) +{ + bool success = get_all_node_records(conn, node_list); + + if (success == false) + { + /* get_all_node_records() will display any error message */ + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + + if (node_list->node_count == 0) + { + log_error(_("no node records were found")); + log_hint(_("ensure at least one node is registered")); + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } +} + + +void do_daemon_help(void) +{ + print_help_header(); + + printf(_("Usage:\n")); + printf(_(" %s [OPTIONS] daemon status\n"), progname()); + printf(_(" %s [OPTIONS] daemon pause\n"), progname()); + printf(_(" %s [OPTIONS] daemon unpause\n"), progname()); + puts(""); + + printf(_("DAEMON STATUS\n")); + puts(""); + printf(_(" \"daemon status\" shows the status of repmgrd on each node in the cluster\n")); + puts(""); + printf(_(" --csv emit output as CSV\n")); + printf(_(" --verbose show text of database connection error messages\n")); + puts(""); + + printf(_("DAEMON PAUSE\n")); + puts(""); + printf(_(" \"daemon pause\" instructs repmgrd on each node to pause failover detection\n")); + puts(""); + printf(_(" --dry-run check if nodes are reachable but don't pause repmgrd\n")); + puts(""); + + printf(_("DAEMON PAUSE\n")); + puts(""); + printf(_(" \"daemon unpause\" instructs repmgrd on each node to resume failover detection\n")); + puts(""); + printf(_(" --dry-run check if nodes are reachable but don't unpause repmgrd\n")); + puts(""); + + + puts(""); +} diff --git a/repmgr-action-daemon.h b/repmgr-action-daemon.h new file mode 100644 index 00000000..026feac0 --- /dev/null +++ b/repmgr-action-daemon.h @@ -0,0 +1,28 @@ +/* + * repmgr-action-daemon.h + * Copyright (c) 2ndQuadrant, 2010-2018 + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _REPMGR_ACTION_DAEMON_H_ +#define _REPMGR_ACTION_DAEMON_H_ + + +extern void do_daemon_status(void); +extern void do_daemon_pause(void); +extern void do_daemon_unpause(void); + +extern void do_daemon_help(void); +#endif diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 167be896..47c69f43 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -2788,15 +2788,13 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor /* * Perform a switchover by: + * * - stopping current primary node * - promoting this standby node to primary - * - forcing previous primary node to follow this node + * - forcing the previous primary node to follow this node * - * Caveat: - * - repmgrd must not be running, otherwise it may - * attempt a failover - * (TODO: find some way of notifying repmgrd of planned - * activity like this) + * Where running and not already paused, repmgrd will be paused (and + * subsequently unpaused), unless --repmgrd-no-pause provided. * * TODO: * - make connection test timeouts/intervals configurable (see below) @@ -2854,6 +2852,11 @@ do_standby_switchover(void) t_event_info event_info = T_EVENT_INFO_INITIALIZER; + /* used for handling repmgrd pause/unpause */ + NodeInfoList all_nodes = T_NODE_INFO_LIST_INITIALIZER; + RepmgrdInfo **repmgrd_info = NULL; + int repmgrd_running_count = 0; + /* * SANITY CHECKS * @@ -2924,7 +2927,7 @@ do_standby_switchover(void) if (record_status != RECORD_FOUND) { - log_error(_("unable to retrieve node record for node %i"), + log_error(_("unable to retrieve node record for currentr primary (node %i)"), remote_node_id); PQfinish(local_conn); @@ -2980,6 +2983,7 @@ do_standby_switchover(void) { min_required_free_slots++; } + /* * If --force-rewind specified, check pg_rewind can be used, and * pre-emptively fetch the list of configuration files which should be @@ -3544,8 +3548,8 @@ do_standby_switchover(void) log_debug("minimum of %i free slots (%i for siblings) required; %i available", min_required_free_slots, - reachable_sibling_nodes_with_slot_count - , available_slots); + reachable_sibling_nodes_with_slot_count, + available_slots); if (available_slots < min_required_free_slots) { @@ -3575,6 +3579,147 @@ do_standby_switchover(void) } } + /* + * Attempt to pause all repmgrd instances, unless user explicitly + * specifies not to. + */ + if (runtime_options.repmgrd_no_pause == false) + { + NodeInfoListCell *cell = NULL; + ItemList repmgrd_connection_errors = {NULL, NULL}; + int i = 0; + int unreachable_node_count = 0; + + get_all_node_records(local_conn, &all_nodes); + + repmgrd_info = (RepmgrdInfo **) pg_malloc0(sizeof(RepmgrdInfo *) * all_nodes.node_count); + + for (cell = all_nodes.head; cell; cell = cell->next) + { + cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo); + + repmgrd_info[i] = pg_malloc0(sizeof(RepmgrdInfo)); + repmgrd_info[i]->node_id = cell->node_info->node_id; + repmgrd_info[i]->pid = UNKNOWN_PID; + repmgrd_info[i]->paused = false; + repmgrd_info[i]->running = false; + + if (PQstatus(cell->node_info->conn) != CONNECTION_OK) + { + /* + * unable to connect; treat this as an error + */ + + repmgrd_info[i]->pg_running = false; + + item_list_append_format(&repmgrd_connection_errors, + _("unable to connect to node \"%s\" (ID %i)"), + cell->node_info->node_name, + cell->node_info->node_id); + + unreachable_node_count++; + continue; + } + + repmgrd_info[i]->running = repmgrd_is_running(cell->node_info->conn); + repmgrd_info[i]->pid = repmgrd_get_pid(cell->node_info->conn); + repmgrd_info[i]->paused = repmgrd_is_paused(cell->node_info->conn); + + if (repmgrd_info[i]->running == true) + repmgrd_running_count++; + + i++; + } + + if (unreachable_node_count > 0) + { + PQExpBufferData msg; + PQExpBufferData detail; + ItemListCell *cell; + + initPQExpBuffer(&msg); + appendPQExpBuffer(&msg, + _("unable to connect to %i node(s), unable to pause all repmgrd instances"), + unreachable_node_count); + + initPQExpBuffer(&detail); + + for (cell = repmgrd_connection_errors.head; cell; cell = cell->next) + { + appendPQExpBuffer(&detail, + " %s\n", + cell->string); + } + + + if (runtime_options.force == false) + { + log_error("%s", msg.data); + } + else + { + log_warning("%s", msg.data); + } + + log_detail(_("following node(s) unreachable:\n%s"), detail.data); + + termPQExpBuffer(&msg); + termPQExpBuffer(&detail); + + /* tell user about footgun */ + if (runtime_options.force == false) + { + log_hint(_("use -F/--force to continue anyway")); + + clear_node_info_list(&sibling_nodes); + clear_node_info_list(&all_nodes); + + exit(ERR_SWITCHOVER_FAIL); + } + + } + + if (repmgrd_running_count > 0) + { + i = 0; + for (cell = all_nodes.head; cell; cell = cell->next) + { + /* + * Skip if node is already paused. Note we won't unpause these, to + * leave the repmgrd instances in the cluster in the same state they + * were before the switchover. + */ + if (repmgrd_info[i]->paused == true) + { + PQfinish(cell->node_info->conn); + cell->node_info->conn = NULL; + i++; + continue; + } + + if (runtime_options.dry_run == true) + { + log_info(_("would pause repmgrd on node %s (ID %i)"), + cell->node_info->node_name, + cell->node_info->node_id); + } + else + { + /* XXX check result */ + log_debug("pausing repmgrd on node %s (ID %i)", + cell->node_info->node_name, + cell->node_info->node_id); + + (void) repmgrd_pause(cell->node_info->conn, true); + } + + PQfinish(cell->node_info->conn); + cell->node_info->conn = NULL; + i++; + } + } + + } /* * Sanity checks completed - prepare for the switchover @@ -3656,6 +3801,7 @@ do_standby_switchover(void) shutdown_command); clear_node_info_list(&sibling_nodes); + clear_node_info_list(&all_nodes); key_value_list_free(&remote_config_files); return; @@ -3793,7 +3939,7 @@ do_standby_switchover(void) /* - * if pg_rewind is requested, issue a checkpoint immediately after promoting + * If pg_rewind is requested, issue a checkpoint immediately after promoting * the local node, as pg_rewind compares timelines on the basis of the value * in pg_control, which is written at the first checkpoint, which might not * occur immediately. @@ -3805,7 +3951,7 @@ do_standby_switchover(void) } /* - * Execute `repmgr node rejoin` to create recovery.conf and start the + * Execute "repmgr node rejoin" to create recovery.conf and start the * remote server. Additionally execute "pg_rewind", if required and * requested. */ @@ -3819,6 +3965,7 @@ do_standby_switchover(void) { log_error(_("new primary diverges from former primary and --force-rewind not provided")); log_hint(_("the former primary will need to be restored manually, or use \"repmgr node rejoin\"")); + termPQExpBuffer(&node_rejoin_options); PQfinish(local_conn); exit(ERR_SWITCHOVER_FAIL); @@ -3875,7 +4022,7 @@ do_standby_switchover(void) if (command_success == false) { - log_error(_("rejoin failed %i"), r); + log_error(_("rejoin failed with error code %i"), r); create_event_notification_extended(local_conn, &config_file_options, @@ -3997,11 +4144,13 @@ do_standby_switchover(void) clear_node_info_list(&sibling_nodes); + + PQfinish(local_conn); /* - * Clean up remote node. It's possible that the standby is still starting up, - * so poll for a while until we get a connection. + * Clean up remote node (primary demoted to standby). It's possible that the node is + * still starting up, so poll for a while until we get a connection. */ for (i = 0; i < config_file_options.standby_reconnect_timeout; i++) @@ -4053,6 +4202,84 @@ do_standby_switchover(void) PQfinish(remote_conn); + /* + * Attempt to unpause all paused repmgrd instances, unless user explicitly + * specifies not to. + */ + if (runtime_options.repmgrd_no_pause == false) + { + if (repmgrd_running_count > 0) + { + ItemList repmgrd_unpause_errors = {NULL, NULL}; + NodeInfoListCell *cell = NULL; + int i = 0; + int error_node_count = 0; + + for (cell = all_nodes.head; cell; cell = cell->next) + { + + if (repmgrd_info[i]->paused == true) + { + log_debug("repmgrd on node %s (ID %i) paused before switchover, not unpausing", + cell->node_info->node_name, + cell->node_info->node_id); + + i++; + continue; + } + + log_debug("unpausing repmgrd on node %s (ID %i)", + cell->node_info->node_name, + cell->node_info->node_id); + + cell->node_info->conn = establish_db_connection_quiet(cell->node_info->conninfo); + + if (PQstatus(cell->node_info->conn) == CONNECTION_OK) + { + if (repmgrd_pause(cell->node_info->conn, false) == false) + { + item_list_append_format(&repmgrd_unpause_errors, + _("unable to unpause node \"%s\" (ID %i)"), + cell->node_info->node_name, + cell->node_info->node_id); + error_node_count++; + } + } + else + { + item_list_append_format(&repmgrd_unpause_errors, + _("unable to connect to node \"%s\" (ID %i)"), + cell->node_info->node_name, + cell->node_info->node_id); + error_node_count++; + } + + i++; + } + + if (error_node_count > 0) + { + PQExpBufferData detail; + ItemListCell *cell; + + for (cell = repmgrd_unpause_errors.head; cell; cell = cell->next) + { + appendPQExpBuffer(&detail, + " %s\n", + cell->string); + } + + log_warning(_("unable to unpause repmgrd on %i node(s)"), + error_node_count); + log_detail(_("errors encountered for following node(s):\n%s"), detail.data); + log_hint(_("check node connection and status; unpause manually with \"repmgr daemon unpause\"")); + + termPQExpBuffer(&detail); + } + } + + clear_node_info_list(&all_nodes); + } if (switchover_success == true) { @@ -6602,6 +6829,7 @@ do_standby_help(void) printf(_(" (9.3 and 9.4 - provide \"pg_rewind\" path)\n")); printf(_(" -R, --remote-user=USERNAME database server username for SSH operations (default: \"%s\")\n"), runtime_options.username); + printf(_(" --repmgrd-no-pause don't pause repmgrd\n")); printf(_(" --siblings-follow have other standbys follow new primary\n")); puts(""); diff --git a/repmgr-client-global.h b/repmgr-client-global.h index 55256f56..d2a4aa65 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -97,6 +97,7 @@ typedef struct bool force_rewind_used; char force_rewind_path[MAXPGPATH]; bool siblings_follow; + bool repmgrd_no_pause; /* "node status" options */ bool is_shutdown_cleanly; @@ -156,7 +157,7 @@ typedef struct /* "standby register" options */ \ false, -1, DEFAULT_WAIT_START, \ /* "standby switchover" options */ \ - false, false, "", false, \ + false, false, "", false, false, \ /* "node status" options */ \ false, \ /* "node check" options */ \ @@ -193,6 +194,14 @@ typedef enum } t_server_action; +typedef struct ColHeader +{ + char title[MAXLEN]; + int max_length; + int cur_length; +} ColHeader; + + /* global configuration structures */ extern t_runtime_options runtime_options; @@ -228,7 +237,10 @@ extern void get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGc extern bool remote_command(const char *host, const char *user, const char *command, PQExpBufferData *outputbuf); extern void make_remote_repmgr_path(PQExpBufferData *outputbuf, t_node_info *remote_node_record); + +/* display functions */ extern void print_help_header(void); +extern void print_status_header(int cols, ColHeader *headers); /* server control functions */ extern void get_server_action(t_server_action action, char *script, char *data_dir); diff --git a/repmgr-client.c b/repmgr-client.c index daed411a..0395bc9d 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -30,10 +30,15 @@ * NODE STATUS * NODE CHECK * + * DAEMON STATUS + * DAEMON PAUSE + * DAEMON UNPAUSE + * * For internal use: * NODE REJOIN * NODE SERVICE * + * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or @@ -62,6 +67,7 @@ #include "repmgr-action-bdr.h" #include "repmgr-action-node.h" #include "repmgr-action-cluster.h" +#include "repmgr-action-daemon.h" #include /* for PG_TEMP_FILE_PREFIX */ @@ -438,6 +444,10 @@ main(int argc, char **argv) runtime_options.siblings_follow = true; break; + case OPT_REPMGRD_NO_PAUSE: + runtime_options.repmgrd_no_pause = true; + break; + /*---------------------- * "node status" options *---------------------- @@ -900,6 +910,21 @@ main(int argc, char **argv) else if (strcasecmp(repmgr_action, "CLEANUP") == 0) action = CLUSTER_CLEANUP; } + else if (strcasecmp(repmgr_command, "DAEMON") == 0) + { + if (help_option == true) + { + do_daemon_help(); + exit(SUCCESS); + } + + if (strcasecmp(repmgr_action, "STATUS") == 0) + action = DAEMON_STATUS; + else if (strcasecmp(repmgr_action, "PAUSE") == 0) + action = DAEMON_PAUSE; + else if (strcasecmp(repmgr_action, "UNPAUSE") == 0) + action = DAEMON_UNPAUSE; + } else { valid_repmgr_command_found = false; @@ -1298,6 +1323,17 @@ main(int argc, char **argv) do_cluster_cleanup(); break; + /* DAEMON */ + case DAEMON_STATUS: + do_daemon_status(); + break; + case DAEMON_PAUSE: + do_daemon_pause(); + break; + case DAEMON_UNPAUSE: + do_daemon_unpause(); + break; + default: /* An action will have been determined by this point */ break; @@ -1744,6 +1780,18 @@ check_cli_parameters(const int action) } } + if (runtime_options.repmgrd_no_pause == true) + { + switch (action) + { + case STANDBY_SWITCHOVER: + break; + default: + item_list_append_format(&cli_warnings, + _("--repmgrd-no-pause will be ignored when executing %s"), + action_name(action)); + } + } if (runtime_options.config_files[0] != '\0') { @@ -1772,6 +1820,8 @@ check_cli_parameters(const int action) case WITNESS_UNREGISTER: case NODE_REJOIN: case NODE_SERVICE: + case DAEMON_PAUSE: + case DAEMON_UNPAUSE: break; default: item_list_append_format(&cli_warnings, @@ -1851,6 +1901,14 @@ action_name(const int action) return "CLUSTER MATRIX"; case CLUSTER_CROSSCHECK: return "CLUSTER CROSSCHECK"; + + case DAEMON_STATUS: + return "DAEMON STATUS"; + case DAEMON_PAUSE: + return "DAEMON PAUSE"; + case DAEMON_UNPAUSE: + return "DAEMON UNPAUSE"; + } return "UNKNOWN ACTION"; @@ -1878,6 +1936,42 @@ print_error_list(ItemList *error_list, int log_level) } +void +print_status_header(int cols, ColHeader *headers) +{ + int i; + + for (i = 0; i < cols; i++) + { + if (i == 0) + printf(" "); + else + printf(" | "); + + printf("%-*s", + headers[i].max_length, + headers[i].title); + } + printf("\n"); + printf("-"); + + for (i = 0; i < cols; i++) + { + int j; + + for (j = 0; j < headers[i].max_length; j++) + printf("-"); + + if (i < (cols - 1)) + printf("-+-"); + else + printf("-"); + } + + printf("\n"); +} + + void print_help_header(void) { @@ -3021,4 +3115,3 @@ drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name) } } } - diff --git a/repmgr-client.h b/repmgr-client.h index 714a560c..c80fb673 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -45,6 +45,9 @@ #define CLUSTER_MATRIX 19 #define CLUSTER_CROSSCHECK 20 #define CLUSTER_EVENT 21 +#define DAEMON_STATUS 22 +#define DAEMON_PAUSE 23 +#define DAEMON_UNPAUSE 24 /* command line options without short versions */ #define OPT_HELP 1001 @@ -88,6 +91,7 @@ #define OPT_RECOVERY_CONF_ONLY 1039 #define OPT_NO_WAIT 1040 #define OPT_MISSING_SLOTS 1041 +#define OPT_REPMGRD_NO_PAUSE 1042 /* deprecated since 3.3 */ #define OPT_DATA_DIR 999 @@ -156,6 +160,7 @@ static struct option long_options[] = */ {"always-promote", no_argument, NULL, OPT_ALWAYS_PROMOTE}, {"siblings-follow", no_argument, NULL, OPT_SIBLINGS_FOLLOW}, + {"repmgrd-no-pause", no_argument, NULL, OPT_REPMGRD_NO_PAUSE}, /* "node status" options */ {"is-shutdown-cleanly", no_argument, NULL, OPT_IS_SHUTDOWN_CLEANLY}, diff --git a/repmgr.c b/repmgr.c index 80fb0fe7..4461a354 100644 --- a/repmgr.c +++ b/repmgr.c @@ -26,6 +26,7 @@ #include "access/xlog.h" #include "miscadmin.h" #include "replication/walreceiver.h" +#include "storage/fd.h" #include "storage/ipc.h" #include "storage/lwlock.h" #include "storage/procarray.h" @@ -43,14 +44,21 @@ #include "lib/stringinfo.h" #include "access/xact.h" #include "utils/snapmgr.h" -#include "pgstat.h" +#if (PG_VERSION_NUM >= 90400) +#include "pgstat.h" +#else +#define PGSTAT_STAT_PERMANENT_DIRECTORY "pg_stat" +#endif #include "voting.h" #define UNKNOWN_NODE_ID -1 +#define UNKNOWN_PID -1 #define TRANCHE_NAME "repmgrd" +#define REPMGRD_STATE_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/repmgrd_state.txt" + PG_MODULE_MAGIC; @@ -66,6 +74,9 @@ typedef struct repmgrdSharedState LWLockId lock; /* protects search/modification */ TimestampTz last_updated; int local_node_id; + int repmgrd_pid; + char repmgrd_pidfile[MAXPGPATH]; + bool repmgrd_paused; /* streaming failover */ NodeVotingStatus voting_status; int current_electoral_term; @@ -112,6 +123,25 @@ PG_FUNCTION_INFO_V1(am_bdr_failover_handler); Datum unset_bdr_failover_handler(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(unset_bdr_failover_handler); +Datum set_repmgrd_pid(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(set_repmgrd_pid); + +Datum get_repmgrd_pid(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(get_repmgrd_pid); + +Datum get_repmgrd_pidfile(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(get_repmgrd_pidfile); + +Datum repmgrd_is_running(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(repmgrd_is_running); + +Datum repmgrd_pause(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(repmgrd_pause); + +Datum repmgrd_is_paused(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(repmgrd_is_paused); + + /* * Module load callback @@ -185,6 +215,9 @@ repmgr_shmem_startup(void) #endif shared_state->local_node_id = UNKNOWN_NODE_ID; + shared_state->repmgrd_pid = UNKNOWN_PID; + memset(shared_state->repmgrd_pidfile, 0, MAXPGPATH); + shared_state->repmgrd_paused = false; shared_state->current_electoral_term = 0; shared_state->voting_status = VS_NO_VOTE; shared_state->candidate_node_id = UNKNOWN_NODE_ID; @@ -204,6 +237,8 @@ Datum set_local_node_id(PG_FUNCTION_ARGS) { int local_node_id = UNKNOWN_NODE_ID; + int stored_node_id = UNKNOWN_NODE_ID; + int paused = -1; if (!shared_state) PG_RETURN_NULL(); @@ -213,6 +248,34 @@ set_local_node_id(PG_FUNCTION_ARGS) local_node_id = PG_GETARG_INT32(0); + /* read state file and if exists/valid, update "repmgrd_paused" */ + { + FILE *file = NULL; + + file = AllocateFile(REPMGRD_STATE_FILE, PG_BINARY_R); + + if (file != NULL) + { + int buffer_size = 128; + char buffer[buffer_size]; + + if (fgets(buffer, buffer_size, file) != NULL) + { + if (sscanf(buffer, "%i:%i", &stored_node_id, &paused) != 2) + { + elog(WARNING, "unable to parse repmgrd state file"); + } + else + { + elog(DEBUG1, "node_id: %i; paused: %i", stored_node_id, paused); + } + } + + FreeFile(file); + } + + } + LWLockAcquire(shared_state->lock, LW_EXCLUSIVE); /* only set local_node_id once, as it should never change */ @@ -221,6 +284,19 @@ set_local_node_id(PG_FUNCTION_ARGS) shared_state->local_node_id = local_node_id; } + /* only update if state file valid */ + if (stored_node_id == shared_state->local_node_id) + { + if (paused == 0) + { + shared_state->repmgrd_paused = false; + } + else if (paused == 1) + { + shared_state->repmgrd_paused = true; + } + } + LWLockRelease(shared_state->lock); PG_RETURN_VOID(); @@ -422,3 +498,185 @@ unset_bdr_failover_handler(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } + + +/* + * Returns the repmgrd pid; or NULL if none set; or -1 if set but repmgrd + * process not running (TODO!) + */ +Datum +get_repmgrd_pid(PG_FUNCTION_ARGS) +{ + int repmgrd_pid = UNKNOWN_PID; + + if (!shared_state) + PG_RETURN_NULL(); + + LWLockAcquire(shared_state->lock, LW_SHARED); + repmgrd_pid = shared_state->repmgrd_pid; + LWLockRelease(shared_state->lock); + + PG_RETURN_INT32(repmgrd_pid); +} + + +/* + * Returns the repmgrd pidfile + */ +Datum +get_repmgrd_pidfile(PG_FUNCTION_ARGS) +{ + char repmgrd_pidfile[MAXPGPATH]; + + if (!shared_state) + PG_RETURN_NULL(); + + memset(repmgrd_pidfile, 0, MAXPGPATH); + + LWLockAcquire(shared_state->lock, LW_SHARED); + strncpy(repmgrd_pidfile, shared_state->repmgrd_pidfile, MAXPGPATH); + LWLockRelease(shared_state->lock); + + if (repmgrd_pidfile[0] == '\0') + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(cstring_to_text(repmgrd_pidfile)); +} + +Datum +set_repmgrd_pid(PG_FUNCTION_ARGS) +{ + int repmgrd_pid = UNKNOWN_PID; + char *repmgrd_pidfile = NULL; + + if (!shared_state) + PG_RETURN_VOID(); + + if (PG_ARGISNULL(0)) + { + repmgrd_pid = UNKNOWN_PID; + } + else + { + repmgrd_pid = PG_GETARG_INT32(0); + } + + elog(DEBUG3, "set_repmgrd_pid(): provided pid is %i", repmgrd_pid); + + if (repmgrd_pid != UNKNOWN_PID && !PG_ARGISNULL(1)) + { + repmgrd_pidfile = text_to_cstring(PG_GETARG_TEXT_PP(1)); + elog(INFO, "set_repmgrd_pid(): provided pidfile is %s", repmgrd_pidfile); + } + + LWLockAcquire(shared_state->lock, LW_EXCLUSIVE); + + shared_state->repmgrd_pid = repmgrd_pid; + memset(shared_state->repmgrd_pidfile, 0, MAXPGPATH); + + if(repmgrd_pidfile != NULL) + { + strncpy(shared_state->repmgrd_pidfile, repmgrd_pidfile, MAXPGPATH); + } + + LWLockRelease(shared_state->lock); + PG_RETURN_VOID(); +} + + +Datum +repmgrd_is_running(PG_FUNCTION_ARGS) +{ + int repmgrd_pid = UNKNOWN_PID; + int kill_ret; + + if (!shared_state) + PG_RETURN_NULL(); + + LWLockAcquire(shared_state->lock, LW_SHARED); + repmgrd_pid = shared_state->repmgrd_pid; + LWLockRelease(shared_state->lock); + + /* No PID registered - assume not running */ + if (repmgrd_pid == UNKNOWN_PID) + { + PG_RETURN_BOOL(false); + } + + kill_ret = kill(repmgrd_pid, 0); + + if (kill_ret == 0) + { + PG_RETURN_BOOL(true); + } + + PG_RETURN_BOOL(false); +} + + +Datum +repmgrd_pause(PG_FUNCTION_ARGS) +{ + bool pause; + FILE *file = NULL; + StringInfoData buf; + + if (!shared_state) + PG_RETURN_NULL(); + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + pause = PG_GETARG_BOOL(0); + + LWLockAcquire(shared_state->lock, LW_EXCLUSIVE); + shared_state->repmgrd_paused = pause; + LWLockRelease(shared_state->lock); + + /* write state to file */ + file = AllocateFile(REPMGRD_STATE_FILE, PG_BINARY_W); + + if (file == NULL) + { + elog(DEBUG1, "unable to allocate %s", REPMGRD_STATE_FILE); + + // XXX anything else we can do? log? + PG_RETURN_VOID(); + } + + elog(DEBUG1, "allocated"); + + initStringInfo(&buf); + + LWLockAcquire(shared_state->lock, LW_SHARED); + + appendStringInfo(&buf, "%i:%i", + shared_state->local_node_id, + pause ? 1 : 0); + LWLockRelease(shared_state->lock); + + // XXX check success + fwrite(buf.data, strlen(buf.data) + 1, 1, file); + + + resetStringInfo(&buf); + FreeFile(file); + + PG_RETURN_VOID(); +} + + +Datum +repmgrd_is_paused(PG_FUNCTION_ARGS) +{ + bool is_paused; + + if (!shared_state) + PG_RETURN_NULL(); + + LWLockAcquire(shared_state->lock, LW_SHARED); + is_paused = shared_state->repmgrd_paused; + LWLockRelease(shared_state->lock); + + PG_RETURN_BOOL(is_paused); +} diff --git a/repmgr.h b/repmgr.h index 8bf4ec4f..21a1e067 100644 --- a/repmgr.h +++ b/repmgr.h @@ -53,6 +53,7 @@ #define UNKNOWN_TIMELINE_ID -1 #define UNKNOWN_SYSTEM_IDENTIFIER 0 +#define UNKNOWN_PID -1 #define NODE_NOT_FOUND -1 #define NO_UPSTREAM_NODE -1 diff --git a/repmgrd-physical.c b/repmgrd-physical.c index ec1e0682..a05cc614 100644 --- a/repmgrd-physical.c +++ b/repmgrd-physical.c @@ -106,12 +106,13 @@ handle_sigint_physical(SIGNAL_ARGS) else writeable_conn = primary_conn; - create_event_notification(writeable_conn, - &config_file_options, - config_file_options.node_id, - "repmgrd_shutdown", - true, - event_details.data); + if (PQstatus(writeable_conn) == CONNECTION_OK) + create_event_notification(writeable_conn, + &config_file_options, + config_file_options.node_id, + "repmgrd_shutdown", + true, + event_details.data); termPQExpBuffer(&event_details); @@ -145,7 +146,6 @@ do_physical_node_check(void) case FAILOVER_AUTOMATIC: log_error(_("this node is marked as inactive and cannot be used as a failover target")); log_hint(_("%s"), hint); - close_connection(&local_conn); create_event_notification(NULL, &config_file_options, @@ -206,8 +206,7 @@ do_physical_node_check(void) if (required_param_missing == true) { log_hint(_("add the missing configuration parameter(s) and start repmgrd again")); - close_connection(&local_conn); - exit(ERR_BAD_CONFIG); + terminate(ERR_BAD_CONFIG); } } } @@ -339,6 +338,7 @@ monitor_streaming_primary(void) if (stored_local_node_id == UNKNOWN_NODE_ID) { repmgrd_set_local_node_id(local_conn, config_file_options.node_id); + repmgrd_set_pid(local_conn, getpid(), pid_file); } goto loop; @@ -606,8 +606,7 @@ monitor_streaming_standby(void) if (local_node_info.upstream_node_id == NODE_NOT_FOUND) { log_error(_("unable to determine an active primary for this cluster, terminating")); - close_connection(&local_conn); - exit(ERR_BAD_CONFIG); + terminate(ERR_BAD_CONFIG); } } @@ -623,15 +622,15 @@ monitor_streaming_standby(void) log_error(_("no record found for upstream node (ID: %i), terminating"), local_node_info.upstream_node_id); log_hint(_("ensure the upstream node is registered correctly")); - close_connection(&local_conn); - exit(ERR_DB_CONN); + + terminate(ERR_DB_CONN); } else if (record_status == RECORD_ERROR) { log_error(_("unable to retrieve record for upstream node (ID: %i), terminating"), local_node_info.upstream_node_id); - close_connection(&local_conn); - exit(ERR_DB_CONN); + + terminate(ERR_DB_CONN); } log_debug("connecting to upstream node %i: \"%s\"", upstream_node_info.node_id, upstream_node_info.conninfo); @@ -650,8 +649,7 @@ monitor_streaming_standby(void) local_node_info.upstream_node_id); log_hint(_("upstream node must be running before repmgrd can start")); - close_connection(&local_conn); - exit(ERR_DB_CONN); + terminate(ERR_DB_CONN); } /* @@ -673,7 +671,8 @@ monitor_streaming_standby(void) { log_error(_("unable to connect to primary node")); log_hint(_("ensure the primary node is reachable from this node")); - exit(ERR_DB_CONN); + + terminate(ERR_DB_CONN); } log_verbose(LOG_DEBUG, "connected to primary"); @@ -799,28 +798,40 @@ monitor_streaming_standby(void) goto loop; } - /* still down after reconnect attempt(s) */ + + /* upstream is still down after reconnect attempt(s) */ if (upstream_node_info.node_status == NODE_STATUS_DOWN) { bool failover_done = false; - if (upstream_node_info.type == PRIMARY) + if (PQstatus(local_conn) == CONNECTION_OK && repmgrd_is_paused(local_conn)) { - failover_done = do_primary_failover(); + log_notice(_("repmgrd on this node is paused")); + log_detail(_("no failover will be carried out")); + log_hint(_("execute \"repmgr daemon unpause\" to resume normal failover mode")); + monitoring_state = MS_DEGRADED; + INSTR_TIME_SET_CURRENT(degraded_monitoring_start); } - else if (upstream_node_info.type == STANDBY) + else { - failover_done = do_upstream_standby_failover(); - } + if (upstream_node_info.type == PRIMARY) + { + failover_done = do_primary_failover(); + } + else if (upstream_node_info.type == STANDBY) + { + failover_done = do_upstream_standby_failover(); + } - /* - * XXX it's possible it will make sense to return in all - * cases to restart monitoring - */ - if (failover_done == true) - { - primary_node_id = get_primary_node_id(local_conn); - return; + /* + * XXX it's possible it will make sense to return in all + * cases to restart monitoring + */ + if (failover_done == true) + { + primary_node_id = get_primary_node_id(local_conn); + return; + } } } } @@ -990,7 +1001,7 @@ monitor_streaming_standby(void) } - if (config_file_options.failover == FAILOVER_AUTOMATIC) + if (config_file_options.failover == FAILOVER_AUTOMATIC && repmgrd_is_paused(local_conn) == false) { get_active_sibling_node_records(local_conn, local_node_info.node_id, @@ -1066,7 +1077,15 @@ loop: termPQExpBuffer(&monitoring_summary); if (monitoring_state == MS_DEGRADED && config_file_options.failover == FAILOVER_AUTOMATIC) { - log_detail(_("waiting for upstream or another primary to reappear")); + if (PQstatus(local_conn) == CONNECTION_OK && repmgrd_is_paused(local_conn)) + { + log_detail(_("repmgrd paused by administrator")); + log_hint(_("execute \"repmgr daemon unpause\" to resume normal failover mode")); + } + else + { + log_detail(_("waiting for upstream or another primary to reappear")); + } } else if (config_file_options.monitoring_history == true) { @@ -1195,6 +1214,7 @@ loop: if (stored_local_node_id == UNKNOWN_NODE_ID) { repmgrd_set_local_node_id(local_conn, config_file_options.node_id); + repmgrd_set_pid(local_conn, getpid(), pid_file); } } } @@ -1247,8 +1267,7 @@ monitor_streaming_witness(void) upstream_node_info.node_id); log_hint(_("primary node must be running before repmgrd can start")); - close_connection(&local_conn); - exit(ERR_DB_CONN); + terminate(ERR_DB_CONN); } /* synchronise local copy of "repmgr.nodes", in case it was stale */ @@ -1561,6 +1580,7 @@ loop: if (stored_local_node_id == UNKNOWN_NODE_ID) { repmgrd_set_local_node_id(local_conn, config_file_options.node_id); + repmgrd_set_pid(local_conn, getpid(), pid_file); } } } @@ -2094,6 +2114,7 @@ do_upstream_standby_failover(void) /* refresh shared memory settings which will have been zapped by the restart */ repmgrd_set_local_node_id(local_conn, config_file_options.node_id); + repmgrd_set_pid(local_conn, getpid(), pid_file); /* * @@ -2564,6 +2585,7 @@ follow_new_primary(int new_primary_id) /* refresh shared memory settings which will have been zapped by the restart */ repmgrd_set_local_node_id(local_conn, config_file_options.node_id); + repmgrd_set_pid(local_conn, getpid(), pid_file); initPQExpBuffer(&event_details); appendPQExpBuffer(&event_details, @@ -3088,6 +3110,7 @@ check_connection(t_node_info *node_info, PGconn **conn) if (stored_local_node_id == UNKNOWN_NODE_ID) { repmgrd_set_local_node_id(*conn, config_file_options.node_id); + repmgrd_set_pid(local_conn, getpid(), pid_file); } } diff --git a/repmgrd.c b/repmgrd.c index 80ed5a79..06eba3ec 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -35,7 +35,7 @@ static char *config_file = NULL; static bool verbose = false; -static char pid_file[MAXPGPATH]; +char pid_file[MAXPGPATH]; static bool daemonize = true; static bool show_pid_file = false; static bool no_pid_file = false; @@ -488,6 +488,9 @@ main(int argc, char **argv) check_and_create_pid_file(pid_file); } + repmgrd_set_pid(local_conn, getpid(), pid_file); + + #ifndef WIN32 setup_event_handlers(); #endif @@ -901,6 +904,9 @@ print_monitoring_state(MonitoringState monitoring_state) void terminate(int retval) { + if (PQstatus(local_conn) == CONNECTION_OK) + repmgrd_set_pid(local_conn, UNKNOWN_PID, NULL); + logger_shutdown(); if (pid_file[0] != '\0') diff --git a/repmgrd.h b/repmgrd.h index 0f8f3706..144ec9e8 100644 --- a/repmgrd.h +++ b/repmgrd.h @@ -20,6 +20,7 @@ extern t_configuration_options config_file_options; extern t_node_info local_node_info; extern PGconn *local_conn; extern bool startup_event_logged; +extern char pid_file[MAXPGPATH]; void try_reconnect(PGconn **conn, t_node_info *node_info);