mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-25 08:06:29 +00:00
Terminate repmgrd if standby is no longer connected to upstream
This commit is contained in:
29
README.md
29
README.md
@@ -355,6 +355,7 @@ Following event types currently exist:
|
|||||||
standby_promote
|
standby_promote
|
||||||
witness_create
|
witness_create
|
||||||
repmgrd_start
|
repmgrd_start
|
||||||
|
repmgrd_monitor
|
||||||
repmgrd_failover_promote
|
repmgrd_failover_promote
|
||||||
repmgrd_failover_follow
|
repmgrd_failover_follow
|
||||||
|
|
||||||
@@ -585,20 +586,20 @@ and one view:
|
|||||||
`repmgr` or `repmgrd` will return one of the following error codes on program
|
`repmgr` or `repmgrd` will return one of the following error codes on program
|
||||||
exit:
|
exit:
|
||||||
|
|
||||||
* SUCCESS (0) Program ran successfully.
|
* SUCCESS (0) Program ran successfully.
|
||||||
* ERR_BAD_CONFIG (1) Configuration file could not be parsed or was invalid
|
* ERR_BAD_CONFIG (1) Configuration file could not be parsed or was invalid
|
||||||
* ERR_BAD_RSYNC (2) An rsync call made by the program returned an error
|
* ERR_BAD_RSYNC (2) An rsync call made by the program returned an error
|
||||||
* ERR_NO_RESTART (4) An attempt to restart a PostgreSQL instance failed
|
* ERR_NO_RESTART (4) An attempt to restart a PostgreSQL instance failed
|
||||||
* ERR_DB_CON (6) Error when trying to connect to a database
|
* ERR_DB_CON (6) Error when trying to connect to a database
|
||||||
* ERR_DB_QUERY (7) Error while executing a database query
|
* ERR_DB_QUERY (7) Error while executing a database query
|
||||||
* ERR_PROMOTED (8) Exiting program because the node has been promoted to master
|
* ERR_PROMOTED (8) Exiting program because the node has been promoted to master
|
||||||
* ERR_BAD_PASSWORD (9) Password used to connect to a database was rejected
|
* ERR_BAD_PASSWORD (9) Password used to connect to a database was rejected
|
||||||
* ERR_STR_OVERFLOW (10) String overflow error
|
* ERR_STR_OVERFLOW (10) String overflow error
|
||||||
* ERR_FAILOVER_FAIL (11) Error encountered during failover (repmgrd only)
|
* ERR_FAILOVER_FAIL (11) Error encountered during failover (repmgrd only)
|
||||||
* ERR_BAD_SSH (12) Error when connecting to remote host via SSH
|
* ERR_BAD_SSH (12) Error when connecting to remote host via SSH
|
||||||
* ERR_SYS_FAILURE (13) Error when forking (repmgrd only)
|
* ERR_SYS_FAILURE (13) Error when forking (repmgrd only)
|
||||||
* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup
|
* ERR_BAD_BASEBACKUP (14) Error when executing pg_basebackup
|
||||||
|
* ERR_MONITORING_FAIL (16) Unrecoverable error encountered during monitoring (repmgrd only)
|
||||||
|
|
||||||
Support and Assistance
|
Support and Assistance
|
||||||
----------------------
|
----------------------
|
||||||
|
|||||||
@@ -36,5 +36,6 @@
|
|||||||
#define ERR_SYS_FAILURE 13
|
#define ERR_SYS_FAILURE 13
|
||||||
#define ERR_BAD_BASEBACKUP 14
|
#define ERR_BAD_BASEBACKUP 14
|
||||||
#define ERR_INTERNAL 15
|
#define ERR_INTERNAL 15
|
||||||
|
#define ERR_MONITORING_FAIL 16
|
||||||
|
|
||||||
#endif /* _ERRCODE_H_ */
|
#endif /* _ERRCODE_H_ */
|
||||||
|
|||||||
43
repmgrd.c
43
repmgrd.c
@@ -677,6 +677,7 @@ standby_monitor(void)
|
|||||||
char last_wal_standby_received[MAXLEN];
|
char last_wal_standby_received[MAXLEN];
|
||||||
char last_wal_standby_applied[MAXLEN];
|
char last_wal_standby_applied[MAXLEN];
|
||||||
char last_wal_standby_applied_timestamp[MAXLEN];
|
char last_wal_standby_applied_timestamp[MAXLEN];
|
||||||
|
bool last_wal_standby_received_gte_applied;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
|
||||||
XLogRecPtr lsn_master;
|
XLogRecPtr lsn_master;
|
||||||
@@ -956,7 +957,8 @@ standby_monitor(void)
|
|||||||
/* Get local xlog info */
|
/* Get local xlog info */
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
|
"SELECT CURRENT_TIMESTAMP, pg_last_xlog_receive_location(), "
|
||||||
"pg_last_xlog_replay_location(), pg_last_xact_replay_timestamp() ");
|
"pg_last_xlog_replay_location(), pg_last_xact_replay_timestamp(), "
|
||||||
|
"pg_last_xlog_receive_location() >= pg_last_xlog_replay_location()");
|
||||||
|
|
||||||
res = PQexec(my_local_conn, sqlquery);
|
res = PQexec(my_local_conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
@@ -971,8 +973,47 @@ standby_monitor(void)
|
|||||||
strncpy(last_wal_standby_received, PQgetvalue(res, 0, 1), MAXLEN);
|
strncpy(last_wal_standby_received, PQgetvalue(res, 0, 1), MAXLEN);
|
||||||
strncpy(last_wal_standby_applied, PQgetvalue(res, 0, 2), MAXLEN);
|
strncpy(last_wal_standby_applied, PQgetvalue(res, 0, 2), MAXLEN);
|
||||||
strncpy(last_wal_standby_applied_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
|
strncpy(last_wal_standby_applied_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
|
||||||
|
|
||||||
|
last_wal_standby_received_gte_applied = (strcmp(PQgetvalue(res, 0, 4), "t") == 0)
|
||||||
|
? true
|
||||||
|
: false;
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check that last WAL received is greater or equal to last WAL applied
|
||||||
|
*
|
||||||
|
* This situation can occur when the standby is no longer connected to
|
||||||
|
* the upstream node; in this case repmgrd should terminate itself
|
||||||
|
* as the node may no longer be capable of being promoted or following
|
||||||
|
* a new upstream node
|
||||||
|
*
|
||||||
|
* XXX check if we should (optionally) adopt other strategies to handle
|
||||||
|
* this situation
|
||||||
|
*/
|
||||||
|
if(last_wal_standby_received_gte_applied == false)
|
||||||
|
{
|
||||||
|
PQExpBufferData errmsg;
|
||||||
|
initPQExpBuffer(&errmsg);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&errmsg,
|
||||||
|
/* XXX improve message */
|
||||||
|
_("This node is no longer connected to its upstream node - terminating"));
|
||||||
|
|
||||||
|
log_crit("%s\n", errmsg.data);
|
||||||
|
|
||||||
|
create_event_record(master_conn,
|
||||||
|
&local_options,
|
||||||
|
local_options.node,
|
||||||
|
"repmgrd_monitor",
|
||||||
|
false,
|
||||||
|
errmsg.data);
|
||||||
|
|
||||||
|
// XXX use better code
|
||||||
|
terminate(ERR_MONITORING_FAIL);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Get master xlog info */
|
/* Get master xlog info */
|
||||||
sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location()");
|
sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location()");
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user