diff --git a/dbutils.c b/dbutils.c index bd72c3b0..998c909e 100644 --- a/dbutils.c +++ b/dbutils.c @@ -2095,3 +2095,48 @@ get_data_checksum_version(const char *data_directory) return (int)control_file.data_checksum_version; } + + + +/* ========================== */ +/* backported from repmgr 4.x */ +/* ========================== */ + +XLogRecPtr +parse_lsn(const char *str) +{ + XLogRecPtr ptr = InvalidXLogRecPtr; + uint32 high, + low; + + if (sscanf(str, "%x/%x", &high, &low) == 2) + ptr = (((XLogRecPtr) high) << 32) + (XLogRecPtr) low; + + return ptr; +} + + +XLogRecPtr +get_last_wal_receive_location(PGconn *conn) +{ + PGresult *res = NULL; + XLogRecPtr ptr = InvalidXLogRecPtr; + + if (PQserverVersion(conn) >= 100000) + { + res = PQexec(conn, "SELECT pg_catalog.pg_last_wal_receive_lsn()"); + } + else + { + res = PQexec(conn, "SELECT pg_catalog.pg_last_xlog_receive_location()"); + } + + if (PQresultStatus(res) == PGRES_TUPLES_OK) + { + ptr = parse_lsn(PQgetvalue(res, 0, 0)); + } + + PQclear(res); + + return ptr; +} diff --git a/dbutils.h b/dbutils.h index bc3cb6bc..9b16877c 100644 --- a/dbutils.h +++ b/dbutils.h @@ -28,6 +28,8 @@ #include "strutil.h" +#define format_lsn(x) (uint32) (x >> 32), (uint32) x + typedef enum { UNKNOWN = 0, MASTER, @@ -140,4 +142,9 @@ void create_checkpoint(PGconn *conn); int get_node_replication_state(PGconn *conn, char *node_name, char *output); t_server_type parse_node_type(const char *type); int get_data_checksum_version(const char *data_directory); + +/* backported from repmgr 4.x */ +XLogRecPtr parse_lsn(const char *str); +XLogRecPtr get_last_wal_receive_location(PGconn *conn); + #endif diff --git a/repmgr.c b/repmgr.c index dfd8eab9..fa344857 100644 --- a/repmgr.c +++ b/repmgr.c @@ -4201,27 +4201,13 @@ stop_backup: exit(retval); } -static void -parse_lsn(XLogRecPtr *ptr, const char *str) -{ - uint32 high, low; - - if (sscanf(str, "%x/%x", &high, &low) != 2) - return; - - *ptr = (((XLogRecPtr)high) << 32) + (XLogRecPtr)low; - - return; -} static XLogRecPtr parse_label_lsn(const char *label_key, const char *label_value) { - XLogRecPtr ptr = InvalidXLogRecPtr; + XLogRecPtr ptr = parse_lsn(label_value); - parse_lsn(&ptr, label_value); - - /* parse_lsn() will not modify ptr if it can't parse the label value */ + /* parse_lsn() will return InvalidXLogRecPtr if it can't parse the label value */ if (ptr == InvalidXLogRecPtr) { log_err(_("Couldn't parse backup label entry \"%s: %s\" as lsn"), diff --git a/repmgrd.c b/repmgrd.c index 62bca54e..683623cc 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -514,6 +514,28 @@ main(int argc, char **argv) else if (node_info.type == STANDBY) { log_info(_("starting continuous standby node monitoring\n")); + + /* + * Call update_shared_memory() so it's not stuck at 0/0; this + * will otherwise cause an infinite loop on other repmgrds if + * this repmgrd does not enter failover. + * + * NOTE: this is a temporary workaround for a structural + * issue resolved through architectural redesign in repmgr 4. + */ + { + PQExpBufferData current_lsn; + + XLogRecPtr last_wal_receive_location = get_last_wal_receive_location(my_local_conn); + + initPQExpBuffer(¤t_lsn); + appendPQExpBuffer(¤t_lsn, "%X/%X", + format_lsn(last_wal_receive_location)); + + update_shared_memory(current_lsn.data); + + termPQExpBuffer(¤t_lsn); + } } do @@ -847,6 +869,8 @@ standby_monitor(void) : "upstream"; } + + /* * Check that the upstream node is still available * If not, initiate failover process @@ -1421,7 +1445,7 @@ do_master_failover(void) * * If the master did come back at this point, the voting algorithm should decide * it's the "best candidate" anyway and no standby will promote itself or - * attempt to follow* another server. + * attempt to follow another server. * * If we don't try and connect to the master here (and the code generally * assumes it's failed anyway) but it does come back any time from here @@ -1500,28 +1524,20 @@ do_master_failover(void) terminate(ERR_FAILOVER_FAIL); } - if (server_version_num >= 100000) - sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_last_wal_receive_lsn()"); - else - sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_last_xlog_receive_location()"); + xlog_recptr = get_last_wal_receive_location(node_conn); - res = PQexec(node_conn, sqlquery); - if (PQresultStatus(res) != PGRES_TUPLES_OK) + if (xlog_recptr == InvalidXLogRecPtr) { log_info(_("unable to retrieve node's last standby location: %s\n"), PQerrorMessage(node_conn)); log_debug(_("connection details: %s\n"), nodes[i].conninfo_str); - PQclear(res); PQfinish(node_conn); terminate(ERR_FAILOVER_FAIL); } - xlog_recptr = lsn_to_xlogrecptr(PQgetvalue(res, 0, 0), &lsn_format_ok); + log_debug(_("LSN of node %i is: %X/%X\n"), nodes[i].node_id, format_lsn(xlog_recptr)); - log_debug(_("LSN of node %i is: %s\n"), nodes[i].node_id, PQgetvalue(res, 0, 0)); - - PQclear(res); PQfinish(node_conn); /* If position is 0/0, error */ @@ -1536,7 +1552,6 @@ do_master_failover(void) } /* last we get info about this node, and update shared memory */ - if (server_version_num >= 100000) sprintf(sqlquery, "SELECT pg_catalog.pg_last_wal_receive_lsn()"); else