mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
Terminate repmgrd if standby is no longer connected to upstream
This commit is contained in:
29
README.md
29
README.md
@@ -355,6 +355,7 @@ Following event types currently exist:
|
||||
standby_promote
|
||||
witness_create
|
||||
repmgrd_start
|
||||
repmgrd_monitor
|
||||
repmgrd_failover_promote
|
||||
repmgrd_failover_follow
|
||||
|
||||
@@ -585,20 +586,20 @@ and one view:
|
||||
`repmgr` or `repmgrd` will return one of the following error codes on program
|
||||
exit:
|
||||
|
||||
* SUCCESS (0) Program ran successfully.
|
||||
* ERR_BAD_CONFIG (1) Configuration file could not be parsed or was invalid
|
||||
* ERR_BAD_RSYNC (2) An rsync call made by the program returned an error
|
||||
* ERR_NO_RESTART (4) An attempt to restart a PostgreSQL instance failed
|
||||
* ERR_DB_CON (6) Error when trying to connect to a database
|
||||
* ERR_DB_QUERY (7) Error while executing a database query
|
||||
* ERR_PROMOTED (8) Exiting program because the node has been promoted to master
|
||||
* ERR_BAD_PASSWORD (9) Password used to connect to a database was rejected
|
||||
* ERR_STR_OVERFLOW (10) String overflow error
|
||||
* ERR_FAILOVER_FAIL (11) Error encountered during failover (repmgrd only)
|
||||
* ERR_BAD_SSH (12) Error when connecting to remote host via SSH
|
||||
* ERR_SYS_FAILURE (13) Error when forking (repmgrd only)
|
||||
* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup
|
||||
|
||||
* SUCCESS (0) Program ran successfully.
|
||||
* ERR_BAD_CONFIG (1) Configuration file could not be parsed or was invalid
|
||||
* ERR_BAD_RSYNC (2) An rsync call made by the program returned an error
|
||||
* ERR_NO_RESTART (4) An attempt to restart a PostgreSQL instance failed
|
||||
* ERR_DB_CON (6) Error when trying to connect to a database
|
||||
* ERR_DB_QUERY (7) Error while executing a database query
|
||||
* ERR_PROMOTED (8) Exiting program because the node has been promoted to master
|
||||
* ERR_BAD_PASSWORD (9) Password used to connect to a database was rejected
|
||||
* ERR_STR_OVERFLOW (10) String overflow error
|
||||
* ERR_FAILOVER_FAIL (11) Error encountered during failover (repmgrd only)
|
||||
* ERR_BAD_SSH (12) Error when connecting to remote host via SSH
|
||||
* ERR_SYS_FAILURE (13) Error when forking (repmgrd only)
|
||||
* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup
|
||||
* ERR_MONITORING_FAIL (16) Unrecoverable error encountered during monitoring (repmgrd only)
|
||||
|
||||
Support and Assistance
|
||||
----------------------
|
||||
|
||||
@@ -36,5 +36,6 @@
|
||||
#define ERR_SYS_FAILURE 13
|
||||
#define ERR_BAD_BASEBACKUP 14
|
||||
#define ERR_INTERNAL 15
|
||||
#define ERR_MONITORING_FAIL 16
|
||||
|
||||
#endif /* _ERRCODE_H_ */
|
||||
|
||||
43
repmgrd.c
43
repmgrd.c
@@ -677,6 +677,7 @@ standby_monitor(void)
|
||||
char last_wal_standby_received[MAXLEN];
|
||||
char last_wal_standby_applied[MAXLEN];
|
||||
char last_wal_standby_applied_timestamp[MAXLEN];
|
||||
bool last_wal_standby_received_gte_applied;
|
||||
char sqlquery[QUERY_STR_LEN];
|
||||
|
||||
XLogRecPtr lsn_master;
|
||||
@@ -956,7 +957,8 @@ standby_monitor(void)
|
||||
/* Get local xlog info */
|
||||
sqlquery_snprintf(sqlquery,
|
||||
"SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
|
||||
"pg_last_xlog_replay_location(), pg_last_xact_replay_timestamp() ");
|
||||
"pg_last_xlog_replay_location(), pg_last_xact_replay_timestamp(), "
|
||||
"pg_last_xlog_receive_location() >= pg_last_xlog_replay_location()");
|
||||
|
||||
res = PQexec(my_local_conn, sqlquery);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
@@ -971,8 +973,47 @@ standby_monitor(void)
|
||||
strncpy(last_wal_standby_received, PQgetvalue(res, 0, 1), MAXLEN);
|
||||
strncpy(last_wal_standby_applied, PQgetvalue(res, 0, 2), MAXLEN);
|
||||
strncpy(last_wal_standby_applied_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
|
||||
|
||||
last_wal_standby_received_gte_applied = (strcmp(PQgetvalue(res, 0, 4), "t") == 0)
|
||||
? true
|
||||
: false;
|
||||
|
||||
PQclear(res);
|
||||
|
||||
/*
|
||||
* Check that last WAL received is greater or equal to last WAL applied
|
||||
*
|
||||
* This situation can occur when the standby is no longer connected to
|
||||
* the upstream node; in this case repmgrd should terminate itself
|
||||
* as the node may no longer be capable of being promoted or following
|
||||
* a new upstream node
|
||||
*
|
||||
* XXX check if we should (optionally) adopt other strategies to handle
|
||||
* this situation
|
||||
*/
|
||||
if(last_wal_standby_received_gte_applied == false)
|
||||
{
|
||||
PQExpBufferData errmsg;
|
||||
initPQExpBuffer(&errmsg);
|
||||
|
||||
appendPQExpBuffer(&errmsg,
|
||||
/* XXX improve message */
|
||||
_("This node is no longer connected to its upstream node - terminating"));
|
||||
|
||||
log_crit("%s\n", errmsg.data);
|
||||
|
||||
create_event_record(master_conn,
|
||||
&local_options,
|
||||
local_options.node,
|
||||
"repmgrd_monitor",
|
||||
false,
|
||||
errmsg.data);
|
||||
|
||||
// XXX use better code
|
||||
terminate(ERR_MONITORING_FAIL);
|
||||
}
|
||||
|
||||
|
||||
/* Get master xlog info */
|
||||
sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location()");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user