mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 23:26:30 +00:00
Compare commits
10 Commits
REL3_3_STA
...
REL3_4_STA
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3988653d6c | ||
|
|
3f9b10a02c | ||
|
|
df34e7e8c5 | ||
|
|
668b2c9b59 | ||
|
|
9629fb6eb5 | ||
|
|
967b7c6876 | ||
|
|
120dd5b82d | ||
|
|
243b5d2b48 | ||
|
|
24a354c0a7 | ||
|
|
a4f572a1ff |
5
HISTORY
5
HISTORY
@@ -1,5 +1,8 @@
|
|||||||
3.3.3 2017-06
|
3.4.0 2019-02-
|
||||||
|
default log level is now INFO (Ian)
|
||||||
repmgr: fix `standby register --force` when updating existing node record (Ian)
|
repmgr: fix `standby register --force` when updating existing node record (Ian)
|
||||||
|
repmgrd: set LSN shared memory value at standby startup (Ian)
|
||||||
|
repmgrd: improve logging during failover (Ian)
|
||||||
|
|
||||||
3.3.2 2017-06-01
|
3.3.2 2017-06-01
|
||||||
Add support for PostgreSQL 10 (Ian)
|
Add support for PostgreSQL 10 (Ian)
|
||||||
|
|||||||
@@ -7,9 +7,13 @@ replication capabilities with utilities to set up standby servers, monitor
|
|||||||
replication, and perform administrative tasks such as failover or switchover
|
replication, and perform administrative tasks such as failover or switchover
|
||||||
operations.
|
operations.
|
||||||
|
|
||||||
The current `repmgr` version (3.3) supports all PostgreSQL versions from
|
This `repmgr` version (3.4) supports PostgreSQL versions from
|
||||||
9.3 to 9.6.
|
9.3 to 9.6.
|
||||||
|
|
||||||
|
*NOTE*: we strongly recommend using the repmgr 4.x series, which contains
|
||||||
|
many new features and usability enhancements and is being actively developed
|
||||||
|
and maintained.
|
||||||
|
|
||||||
Overview
|
Overview
|
||||||
--------
|
--------
|
||||||
|
|
||||||
@@ -210,7 +214,7 @@ See `PACKAGES.md` for details on building .deb and .rpm packages from the
|
|||||||
Release tarballs are also available:
|
Release tarballs are also available:
|
||||||
|
|
||||||
https://github.com/2ndQuadrant/repmgr/releases
|
https://github.com/2ndQuadrant/repmgr/releases
|
||||||
http://repmgr.org/
|
https://repmgr.org/
|
||||||
|
|
||||||
`repmgr` is compiled in the same way as a PostgreSQL extension using the PGXS
|
`repmgr` is compiled in the same way as a PostgreSQL extension using the PGXS
|
||||||
infrastructure, e.g.:
|
infrastructure, e.g.:
|
||||||
|
|||||||
2
config.c
2
config.c
@@ -59,7 +59,7 @@ progname(void)
|
|||||||
* added/changed in reload_config()
|
* added/changed in reload_config()
|
||||||
*
|
*
|
||||||
* NOTE: this function is called before the logger is set up, so we need
|
* NOTE: this function is called before the logger is set up, so we need
|
||||||
* to handle the verbose option ourselves; also the default log level is NOTICE,
|
* to handle the verbose option ourselves; also the default log level is INFO,
|
||||||
* so we can't use DEBUG.
|
* so we can't use DEBUG.
|
||||||
*/
|
*/
|
||||||
bool
|
bool
|
||||||
|
|||||||
66
dbutils.c
66
dbutils.c
@@ -322,8 +322,6 @@ is_standby(PGconn *conn)
|
|||||||
bool
|
bool
|
||||||
is_pgup(PGconn *conn, int timeout)
|
is_pgup(PGconn *conn, int timeout)
|
||||||
{
|
{
|
||||||
char sqlquery[QUERY_STR_LEN];
|
|
||||||
|
|
||||||
/* Check the connection status twice in case it changes after reset */
|
/* Check the connection status twice in case it changes after reset */
|
||||||
bool twice = false;
|
bool twice = false;
|
||||||
|
|
||||||
@@ -346,8 +344,7 @@ is_pgup(PGconn *conn, int timeout)
|
|||||||
if (wait_connection_availability(conn, timeout) != 1)
|
if (wait_connection_availability(conn, timeout) != 1)
|
||||||
goto failed;
|
goto failed;
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery, "SELECT 1");
|
if (PQsendQuery(conn, "SELECT 1") == 0)
|
||||||
if (PQsendQuery(conn, sqlquery) == 0)
|
|
||||||
{
|
{
|
||||||
log_warning(_("PQsendQuery: Query could not be sent to primary. %s\n"),
|
log_warning(_("PQsendQuery: Query could not be sent to primary. %s\n"),
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
@@ -2095,3 +2092,64 @@ get_data_checksum_version(const char *data_directory)
|
|||||||
|
|
||||||
return (int)control_file.data_checksum_version;
|
return (int)control_file.data_checksum_version;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* ========================== */
|
||||||
|
/* backported from repmgr 4.x */
|
||||||
|
/* ========================== */
|
||||||
|
|
||||||
|
XLogRecPtr
|
||||||
|
parse_lsn(const char *str)
|
||||||
|
{
|
||||||
|
XLogRecPtr ptr = InvalidXLogRecPtr;
|
||||||
|
uint32 high,
|
||||||
|
low;
|
||||||
|
|
||||||
|
if (sscanf(str, "%x/%x", &high, &low) == 2)
|
||||||
|
ptr = (((XLogRecPtr) high) << 32) + (XLogRecPtr) low;
|
||||||
|
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
XLogRecPtr
|
||||||
|
get_last_wal_receive_location(PGconn *conn)
|
||||||
|
{
|
||||||
|
PGresult *res = NULL;
|
||||||
|
XLogRecPtr ptr = InvalidXLogRecPtr;
|
||||||
|
|
||||||
|
if (PQserverVersion(conn) >= 100000)
|
||||||
|
{
|
||||||
|
res = PQexec(conn, "SELECT pg_catalog.pg_last_wal_receive_lsn()");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
res = PQexec(conn, "SELECT pg_catalog.pg_last_xlog_receive_location()");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PQresultStatus(res) == PGRES_TUPLES_OK)
|
||||||
|
{
|
||||||
|
ptr = parse_lsn(PQgetvalue(res, 0, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool
|
||||||
|
is_server_available(const char *conninfo)
|
||||||
|
{
|
||||||
|
PGPing status = PQping(conninfo);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "is_server_available(): ping status for \"%s\" is %i\n", conninfo, (int)status);
|
||||||
|
|
||||||
|
if (status == PQPING_OK)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
log_warning("is_server_available(): ping status for \"%s\" is %i\n", conninfo, (int)status);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|||||||
@@ -28,6 +28,8 @@
|
|||||||
#include "strutil.h"
|
#include "strutil.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define format_lsn(x) (uint32) (x >> 32), (uint32) x
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
UNKNOWN = 0,
|
UNKNOWN = 0,
|
||||||
MASTER,
|
MASTER,
|
||||||
@@ -140,4 +142,10 @@ void create_checkpoint(PGconn *conn);
|
|||||||
int get_node_replication_state(PGconn *conn, char *node_name, char *output);
|
int get_node_replication_state(PGconn *conn, char *node_name, char *output);
|
||||||
t_server_type parse_node_type(const char *type);
|
t_server_type parse_node_type(const char *type);
|
||||||
int get_data_checksum_version(const char *data_directory);
|
int get_data_checksum_version(const char *data_directory);
|
||||||
|
|
||||||
|
/* backported from repmgr 4.x */
|
||||||
|
XLogRecPtr parse_lsn(const char *str);
|
||||||
|
XLogRecPtr get_last_wal_receive_location(PGconn *conn);
|
||||||
|
bool is_server_available(const char *conninfo);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
4
log.c
4
log.c
@@ -44,8 +44,8 @@ static void _stderr_log_with_level(const char *level_name, int level, const char
|
|||||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));
|
__attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 0)));
|
||||||
|
|
||||||
int log_type = REPMGR_STDERR;
|
int log_type = REPMGR_STDERR;
|
||||||
int log_level = LOG_NOTICE;
|
int log_level = LOG_INFO;
|
||||||
int last_log_level = LOG_NOTICE;
|
int last_log_level = LOG_INFO;
|
||||||
int verbose_logging = false;
|
int verbose_logging = false;
|
||||||
int terse_logging = false;
|
int terse_logging = false;
|
||||||
/*
|
/*
|
||||||
|
|||||||
26
repmgr.c
26
repmgr.c
@@ -3601,15 +3601,15 @@ do_standby_clone(void)
|
|||||||
/* Only from 9.4 */
|
/* Only from 9.4 */
|
||||||
"pg_dynshmem", "pg_logical", "pg_logical/snapshots", "pg_logical/mappings", "pg_replslot",
|
"pg_dynshmem", "pg_logical", "pg_logical/snapshots", "pg_logical/mappings", "pg_replslot",
|
||||||
/* Already in 9.3 */
|
/* Already in 9.3 */
|
||||||
"pg_notify", "pg_serial", "pg_snapshots", "pg_stat", "pg_stat_tmp", "pg_tblspc",
|
"pg_notify", "pg_serial", "pg_snapshots", "pg_stat", "pg_stat_tmp",
|
||||||
"pg_twophase", "pg_xlog", 0
|
"pg_subtrans", "pg_tblspc", "pg_twophase", "pg_xlog", 0
|
||||||
};
|
};
|
||||||
const int vers[] = {
|
const int vers[] = {
|
||||||
100000,
|
100000,
|
||||||
90500,
|
90500,
|
||||||
90400, 90400, 90400, 90400, 90400,
|
90400, 90400, 90400, 90400, 90400,
|
||||||
0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0,
|
||||||
0, -100000, 0
|
0, 0, 0, -100000
|
||||||
};
|
};
|
||||||
for (i = 0; dirs[i]; i++)
|
for (i = 0; dirs[i]; i++)
|
||||||
{
|
{
|
||||||
@@ -4201,27 +4201,13 @@ stop_backup:
|
|||||||
exit(retval);
|
exit(retval);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
parse_lsn(XLogRecPtr *ptr, const char *str)
|
|
||||||
{
|
|
||||||
uint32 high, low;
|
|
||||||
|
|
||||||
if (sscanf(str, "%x/%x", &high, &low) != 2)
|
|
||||||
return;
|
|
||||||
|
|
||||||
*ptr = (((XLogRecPtr)high) << 32) + (XLogRecPtr)low;
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
static XLogRecPtr
|
static XLogRecPtr
|
||||||
parse_label_lsn(const char *label_key, const char *label_value)
|
parse_label_lsn(const char *label_key, const char *label_value)
|
||||||
{
|
{
|
||||||
XLogRecPtr ptr = InvalidXLogRecPtr;
|
XLogRecPtr ptr = parse_lsn(label_value);
|
||||||
|
|
||||||
parse_lsn(&ptr, label_value);
|
/* parse_lsn() will return InvalidXLogRecPtr if it can't parse the label value */
|
||||||
|
|
||||||
/* parse_lsn() will not modify ptr if it can't parse the label value */
|
|
||||||
if (ptr == InvalidXLogRecPtr)
|
if (ptr == InvalidXLogRecPtr)
|
||||||
{
|
{
|
||||||
log_err(_("Couldn't parse backup label entry \"%s: %s\" as lsn"),
|
log_err(_("Couldn't parse backup label entry \"%s: %s\" as lsn"),
|
||||||
|
|||||||
@@ -66,8 +66,8 @@
|
|||||||
# -------------------------------
|
# -------------------------------
|
||||||
|
|
||||||
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
|
# Log level: possible values are DEBUG, INFO, NOTICE, WARNING, ERR, ALERT, CRIT or EMERG
|
||||||
# (default: NOTICE)
|
# (default: INFO)
|
||||||
#loglevel=NOTICE
|
#loglevel=INFO
|
||||||
|
|
||||||
# Note that logging facility settings will only apply to `repmgrd` by default;
|
# Note that logging facility settings will only apply to `repmgrd` by default;
|
||||||
# `repmgr` will always write to STDERR unless the switch `--log-to-file` is
|
# `repmgr` will always write to STDERR unless the switch `--log-to-file` is
|
||||||
|
|||||||
109
repmgrd.c
109
repmgrd.c
@@ -514,6 +514,33 @@ main(int argc, char **argv)
|
|||||||
else if (node_info.type == STANDBY)
|
else if (node_info.type == STANDBY)
|
||||||
{
|
{
|
||||||
log_info(_("starting continuous standby node monitoring\n"));
|
log_info(_("starting continuous standby node monitoring\n"));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Call update_shared_memory() so it's not stuck at 0/0; this
|
||||||
|
* will otherwise cause an infinite loop on other repmgrds if
|
||||||
|
* this repmgrd does not enter failover.
|
||||||
|
*
|
||||||
|
* NOTE: this is a temporary workaround for a structural
|
||||||
|
* issue resolved through architectural redesign in repmgr 4.
|
||||||
|
*/
|
||||||
|
if (local_options.failover == MANUAL_FAILOVER)
|
||||||
|
{
|
||||||
|
update_shared_memory(PASSIVE_NODE);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PQExpBufferData current_lsn;
|
||||||
|
|
||||||
|
XLogRecPtr last_wal_receive_location = get_last_wal_receive_location(my_local_conn);
|
||||||
|
|
||||||
|
initPQExpBuffer(¤t_lsn);
|
||||||
|
appendPQExpBuffer(¤t_lsn, "%X/%X",
|
||||||
|
format_lsn(last_wal_receive_location));
|
||||||
|
|
||||||
|
update_shared_memory(current_lsn.data);
|
||||||
|
|
||||||
|
termPQExpBuffer(¤t_lsn);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
do
|
do
|
||||||
@@ -847,6 +874,8 @@ standby_monitor(void)
|
|||||||
: "upstream";
|
: "upstream";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check that the upstream node is still available
|
* Check that the upstream node is still available
|
||||||
* If not, initiate failover process
|
* If not, initiate failover process
|
||||||
@@ -855,9 +884,7 @@ standby_monitor(void)
|
|||||||
* local_options.reconnect_interval seconds
|
* local_options.reconnect_interval seconds
|
||||||
*/
|
*/
|
||||||
|
|
||||||
check_connection(&upstream_conn, upstream_node_type, upstream_conninfo);
|
if (!check_connection(&upstream_conn, upstream_node_type, upstream_conninfo))
|
||||||
|
|
||||||
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
|
||||||
{
|
{
|
||||||
int previous_master_node_id = master_options.node;
|
int previous_master_node_id = master_options.node;
|
||||||
|
|
||||||
@@ -1372,7 +1399,7 @@ do_master_failover(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
total_active_nodes = PQntuples(res);
|
total_active_nodes = PQntuples(res);
|
||||||
log_debug(_("%d active nodes registered\n"), total_active_nodes);
|
log_info(_("%d active nodes registered\n"), total_active_nodes);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Build an array with the nodes and indicate which ones are visible and
|
* Build an array with the nodes and indicate which ones are visible and
|
||||||
@@ -1421,7 +1448,7 @@ do_master_failover(void)
|
|||||||
*
|
*
|
||||||
* If the master did come back at this point, the voting algorithm should decide
|
* If the master did come back at this point, the voting algorithm should decide
|
||||||
* it's the "best candidate" anyway and no standby will promote itself or
|
* it's the "best candidate" anyway and no standby will promote itself or
|
||||||
* attempt to follow* another server.
|
* attempt to follow another server.
|
||||||
*
|
*
|
||||||
* If we don't try and connect to the master here (and the code generally
|
* If we don't try and connect to the master here (and the code generally
|
||||||
* assumes it's failed anyway) but it does come back any time from here
|
* assumes it's failed anyway) but it does come back any time from here
|
||||||
@@ -1455,8 +1482,8 @@ do_master_failover(void)
|
|||||||
}
|
}
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
log_debug(_("total nodes counted: registered=%d, visible=%d\n"),
|
log_info(_("total nodes counted: registered=%d, visible=%d\n"),
|
||||||
total_active_nodes, visible_nodes);
|
total_active_nodes, visible_nodes);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Am I on the group that should keep alive? If I see less than half of
|
* Am I on the group that should keep alive? If I see less than half of
|
||||||
@@ -1473,7 +1500,7 @@ do_master_failover(void)
|
|||||||
/* Query all available nodes to determine readiness and LSN */
|
/* Query all available nodes to determine readiness and LSN */
|
||||||
for (i = 0; i < total_active_nodes; i++)
|
for (i = 0; i < total_active_nodes; i++)
|
||||||
{
|
{
|
||||||
log_debug("checking node %i...\n", nodes[i].node_id);
|
log_info("checking node %i...\n", nodes[i].node_id);
|
||||||
|
|
||||||
/* if the node is not visible, skip it */
|
/* if the node is not visible, skip it */
|
||||||
if (!nodes[i].is_visible)
|
if (!nodes[i].is_visible)
|
||||||
@@ -1497,31 +1524,25 @@ do_master_failover(void)
|
|||||||
if (PQstatus(node_conn) != CONNECTION_OK)
|
if (PQstatus(node_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_err(_("It seems new problems are arising, manual intervention is needed\n"));
|
log_err(_("It seems new problems are arising, manual intervention is needed\n"));
|
||||||
|
log_detail("%s\n", PQerrorMessage(node_conn));
|
||||||
terminate(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (server_version_num >= 100000)
|
xlog_recptr = get_last_wal_receive_location(node_conn);
|
||||||
sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_last_wal_receive_lsn()");
|
|
||||||
else
|
|
||||||
sqlquery_snprintf(sqlquery, "SELECT pg_catalog.pg_last_xlog_receive_location()");
|
|
||||||
|
|
||||||
res = PQexec(node_conn, sqlquery);
|
if (xlog_recptr == InvalidXLogRecPtr)
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
|
||||||
{
|
{
|
||||||
log_info(_("unable to retrieve node's last standby location: %s\n"),
|
log_info(_("unable to retrieve last standby location for node %i: %s\n"),
|
||||||
|
nodes[i].node_id,
|
||||||
PQerrorMessage(node_conn));
|
PQerrorMessage(node_conn));
|
||||||
|
|
||||||
log_debug(_("connection details: %s\n"), nodes[i].conninfo_str);
|
log_detail(_("connection details: %s\n"), nodes[i].conninfo_str);
|
||||||
PQclear(res);
|
|
||||||
PQfinish(node_conn);
|
PQfinish(node_conn);
|
||||||
terminate(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
xlog_recptr = lsn_to_xlogrecptr(PQgetvalue(res, 0, 0), &lsn_format_ok);
|
log_info(_("current LSN of node %i is: %X/%X\n"), nodes[i].node_id, format_lsn(xlog_recptr));
|
||||||
|
|
||||||
log_debug(_("LSN of node %i is: %s\n"), nodes[i].node_id, PQgetvalue(res, 0, 0));
|
|
||||||
|
|
||||||
PQclear(res);
|
|
||||||
PQfinish(node_conn);
|
PQfinish(node_conn);
|
||||||
|
|
||||||
/* If position is 0/0, error */
|
/* If position is 0/0, error */
|
||||||
@@ -1536,7 +1557,6 @@ do_master_failover(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* last we get info about this node, and update shared memory */
|
/* last we get info about this node, and update shared memory */
|
||||||
|
|
||||||
if (server_version_num >= 100000)
|
if (server_version_num >= 100000)
|
||||||
sprintf(sqlquery, "SELECT pg_catalog.pg_last_wal_receive_lsn()");
|
sprintf(sqlquery, "SELECT pg_catalog.pg_last_wal_receive_lsn()");
|
||||||
else
|
else
|
||||||
@@ -1555,6 +1575,9 @@ do_master_failover(void)
|
|||||||
}
|
}
|
||||||
/* write last location in shared memory */
|
/* write last location in shared memory */
|
||||||
update_shared_memory(PQgetvalue(res, 0, 0));
|
update_shared_memory(PQgetvalue(res, 0, 0));
|
||||||
|
|
||||||
|
log_info("local node's LSN is %s\n", PQgetvalue(res, 0, 0));
|
||||||
|
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
/* Wait for each node to come up and report a valid LSN */
|
/* Wait for each node to come up and report a valid LSN */
|
||||||
@@ -1591,6 +1614,9 @@ do_master_failover(void)
|
|||||||
*/
|
*/
|
||||||
if (PQstatus(node_conn) != CONNECTION_OK)
|
if (PQstatus(node_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
|
log_err(_("connection to node %i has gone away:\n%s\n"),
|
||||||
|
nodes[i].node_id,
|
||||||
|
PQerrorMessage(node_conn));
|
||||||
log_info(_("At this point, it could be some race conditions "
|
log_info(_("At this point, it could be some race conditions "
|
||||||
"that are acceptable, assume the node is restarting "
|
"that are acceptable, assume the node is restarting "
|
||||||
"and starting failover procedure\n"));
|
"and starting failover procedure\n"));
|
||||||
@@ -1607,6 +1633,9 @@ do_master_failover(void)
|
|||||||
res = PQexec(node_conn, sqlquery);
|
res = PQexec(node_conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Note: in repmgr4 we handle this kind of situation much more gracefully.
|
||||||
|
*/
|
||||||
log_err(_("PQexec failed: %s.\nReport an invalid value to not "
|
log_err(_("PQexec failed: %s.\nReport an invalid value to not "
|
||||||
"be considered as new master and exit.\n"),
|
"be considered as new master and exit.\n"),
|
||||||
PQerrorMessage(node_conn));
|
PQerrorMessage(node_conn));
|
||||||
@@ -1639,8 +1668,8 @@ do_master_failover(void)
|
|||||||
*/
|
*/
|
||||||
if (strcmp(location_value, PASSIVE_NODE) == 0)
|
if (strcmp(location_value, PASSIVE_NODE) == 0)
|
||||||
{
|
{
|
||||||
log_debug("node %i is passive mode\n", nodes[i].node_id);
|
|
||||||
log_info(_("node %i will not be considered for promotion\n"), nodes[i].node_id);
|
log_info(_("node %i will not be considered for promotion\n"), nodes[i].node_id);
|
||||||
|
log_detail("node %i indicates it is a passive node\n", nodes[i].node_id);
|
||||||
nodes[i].xlog_location = InvalidXLogRecPtr;
|
nodes[i].xlog_location = InvalidXLogRecPtr;
|
||||||
continue_loop = false;
|
continue_loop = false;
|
||||||
}
|
}
|
||||||
@@ -1650,7 +1679,8 @@ do_master_failover(void)
|
|||||||
*/
|
*/
|
||||||
else if (strcmp(location_value, LSN_QUERY_ERROR) == 0)
|
else if (strcmp(location_value, LSN_QUERY_ERROR) == 0)
|
||||||
{
|
{
|
||||||
log_warning(_("node %i is unable to update its shared memory and will not be considered for promotion\n"), nodes[i].node_id);
|
log_warning(_("node %i is unable to update its shared memory and will not be considered for promotion\n"),
|
||||||
|
nodes[i].node_id);
|
||||||
nodes[i].xlog_location = InvalidXLogRecPtr;
|
nodes[i].xlog_location = InvalidXLogRecPtr;
|
||||||
continue_loop = false;
|
continue_loop = false;
|
||||||
}
|
}
|
||||||
@@ -1658,12 +1688,8 @@ do_master_failover(void)
|
|||||||
/* Unable to parse value returned by `repmgr_get_last_standby_location()` */
|
/* Unable to parse value returned by `repmgr_get_last_standby_location()` */
|
||||||
else if (*location_value == '\0')
|
else if (*location_value == '\0')
|
||||||
{
|
{
|
||||||
log_crit(
|
log_crit(_("unable to obtain LSN from node %i"), nodes[i].node_id);
|
||||||
_("unable to obtain LSN from node %i"), nodes[i].node_id
|
log_hint(_("please check that 'shared_preload_libraries=repmgr_funcs' is set in postgresql.conf\n"));
|
||||||
);
|
|
||||||
log_hint(
|
|
||||||
_("please check that 'shared_preload_libraries=repmgr_funcs' is set in postgresql.conf\n")
|
|
||||||
);
|
|
||||||
|
|
||||||
PQfinish(node_conn);
|
PQfinish(node_conn);
|
||||||
/* XXX shouldn't we just ignore this node? */
|
/* XXX shouldn't we just ignore this node? */
|
||||||
@@ -1675,14 +1701,14 @@ do_master_failover(void)
|
|||||||
* strategy keep checking
|
* strategy keep checking
|
||||||
*/
|
*/
|
||||||
else {
|
else {
|
||||||
log_warning(_("unable to parse LSN \"%s\"\n"),
|
log_warning(_("unable to parse shared memory LSN \"%s\"\n"),
|
||||||
location_value);
|
location_value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_debug(
|
log_debug(
|
||||||
_("invalid LSN returned from node %i: '%s'\n"),
|
_("invalid shared memory LSN returned from node %i: '%s'\n"),
|
||||||
nodes[i].node_id,
|
nodes[i].node_id,
|
||||||
location_value);
|
location_value);
|
||||||
}
|
}
|
||||||
@@ -1704,7 +1730,7 @@ do_master_failover(void)
|
|||||||
nodes[i].xlog_location = xlog_recptr;
|
nodes[i].xlog_location = xlog_recptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
log_debug(_("LSN of node %i is: %s\n"), nodes[i].node_id, location_value);
|
log_info(_("shared memory LSN of node %i is: %s\n"), nodes[i].node_id, location_value);
|
||||||
|
|
||||||
ready_nodes++;
|
ready_nodes++;
|
||||||
nodes[i].is_ready = true;
|
nodes[i].is_ready = true;
|
||||||
@@ -1760,7 +1786,7 @@ do_master_failover(void)
|
|||||||
terminate(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
|
|
||||||
log_debug("best candidate node id is %i\n", best_candidate.node_id);
|
log_info("best candidate node id is %i\n", best_candidate.node_id);
|
||||||
|
|
||||||
/* if local node is the best candidate, promote it */
|
/* if local node is the best candidate, promote it */
|
||||||
if (best_candidate.node_id == local_options.node)
|
if (best_candidate.node_id == local_options.node)
|
||||||
@@ -1776,9 +1802,9 @@ do_master_failover(void)
|
|||||||
sleep(5);
|
sleep(5);
|
||||||
|
|
||||||
log_notice(_("this node is the best candidate to be the new master, promoting...\n"));
|
log_notice(_("this node is the best candidate to be the new master, promoting...\n"));
|
||||||
|
log_detail(_("LSN is %X/%X\n"), format_lsn(best_candidate.xlog_location));
|
||||||
log_debug("promote command is: \"%s\"\n",
|
log_info("promote command is: \"%s\"\n",
|
||||||
local_options.promote_command);
|
local_options.promote_command);
|
||||||
|
|
||||||
if (log_type == REPMGR_STDERR && *local_options.logfile)
|
if (log_type == REPMGR_STDERR && *local_options.logfile)
|
||||||
{
|
{
|
||||||
@@ -1834,6 +1860,8 @@ do_master_failover(void)
|
|||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
failed_master.node_id);
|
failed_master.node_id);
|
||||||
|
|
||||||
|
log_notice("%s\n", event_details.data);
|
||||||
|
|
||||||
/* my_local_conn is now the master */
|
/* my_local_conn is now the master */
|
||||||
create_event_record(my_local_conn,
|
create_event_record(my_local_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
@@ -1894,7 +1922,7 @@ do_master_failover(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
log_debug(_("executing follow command: \"%s\"\n"), local_options.follow_command);
|
log_notice(_("executing follow command: \"%s\"\n"), local_options.follow_command);
|
||||||
|
|
||||||
r = system(local_options.follow_command);
|
r = system(local_options.follow_command);
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
@@ -2112,8 +2140,11 @@ check_connection(PGconn **conn, const char *type, const char *conninfo)
|
|||||||
{
|
{
|
||||||
int connection_retries;
|
int connection_retries;
|
||||||
|
|
||||||
|
if (conninfo != NULL && is_server_available(conninfo))
|
||||||
|
return true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check if the node is still available if after
|
* Check if the node is still available; if after
|
||||||
* local_options.reconnect_attempts * local_options.reconnect_interval
|
* local_options.reconnect_attempts * local_options.reconnect_interval
|
||||||
* seconds of retries we cannot reconnect return false
|
* seconds of retries we cannot reconnect return false
|
||||||
*/
|
*/
|
||||||
|
|||||||
Reference in New Issue
Block a user