mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-27 08:56:29 +00:00
On local node failure, attempt to update record on primary server
This commit is contained in:
76
repmgrd.c
76
repmgrd.c
@@ -89,6 +89,8 @@ static void check_node_configuration(void);
|
|||||||
static void standby_monitor(void);
|
static void standby_monitor(void);
|
||||||
static void witness_monitor(void);
|
static void witness_monitor(void);
|
||||||
static bool check_connection(PGconn *conn, const char *type);
|
static bool check_connection(PGconn *conn, const char *type);
|
||||||
|
static bool set_local_node_failed(void);
|
||||||
|
|
||||||
static void update_node_record_set_primary(PGconn *conn, int this_node_id, int old_primary_node_id);
|
static void update_node_record_set_primary(PGconn *conn, int this_node_id, int old_primary_node_id);
|
||||||
static void update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id);
|
static void update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id);
|
||||||
|
|
||||||
@@ -647,6 +649,19 @@ standby_monitor(void)
|
|||||||
int upstream_node_id;
|
int upstream_node_id;
|
||||||
t_node_info upstream_node;
|
t_node_info upstream_node;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Verify that the local node is still available - if not there's
|
||||||
|
* no point in doing much else anyway
|
||||||
|
*/
|
||||||
|
|
||||||
|
log_debug("checking local node connection...\n"); // ZZZ
|
||||||
|
if (!check_connection(my_local_conn, "standby"))
|
||||||
|
{
|
||||||
|
set_local_node_failed();
|
||||||
|
log_err(_("Failed to connect to local node, exiting!\n"));
|
||||||
|
terminate(1);
|
||||||
|
}
|
||||||
|
|
||||||
upstream_conn = get_upstream_connection(my_local_conn,
|
upstream_conn = get_upstream_connection(my_local_conn,
|
||||||
local_options.cluster_name,
|
local_options.cluster_name,
|
||||||
local_options.node,
|
local_options.node,
|
||||||
@@ -664,11 +679,7 @@ standby_monitor(void)
|
|||||||
* local_options.reconnect_intv
|
* local_options.reconnect_intv
|
||||||
* l seconds */
|
* l seconds */
|
||||||
|
|
||||||
if (!check_connection(my_local_conn, "standby"))
|
|
||||||
{
|
|
||||||
log_err("Failed to connect to local node, exiting!\n");
|
|
||||||
terminate(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
@@ -771,6 +782,7 @@ standby_monitor(void)
|
|||||||
|
|
||||||
if (!check_connection(my_local_conn, "standby"))
|
if (!check_connection(my_local_conn, "standby"))
|
||||||
{
|
{
|
||||||
|
set_local_node_failed();
|
||||||
terminate(0);
|
terminate(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1394,6 +1406,7 @@ check_connection(PGconn *conn, const char *type)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!is_pgup(conn, local_options.master_response_timeout))
|
if (!is_pgup(conn, local_options.master_response_timeout))
|
||||||
{
|
{
|
||||||
log_err(_("%s: Unable to reconnect to %s after %i seconds...\n"),
|
log_err(_("%s: Unable to reconnect to %s after %i seconds...\n"),
|
||||||
@@ -1404,6 +1417,59 @@ check_connection(PGconn *conn, const char *type)
|
|||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* set_local_node_failed()
|
||||||
|
*
|
||||||
|
* If failure of the local node is detected, attempt to connect
|
||||||
|
* to the current primary server (as stored in the global variable
|
||||||
|
* `primary_conn`) and update its record to failed.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static bool
|
||||||
|
set_local_node_failed(void)
|
||||||
|
{
|
||||||
|
PGresult *res;
|
||||||
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
|
||||||
|
if (!check_connection(primary_conn, "master"))
|
||||||
|
{
|
||||||
|
log_err(_("Unable to connect to last known primary node\n"));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check that this node is still primary - it's just about conceivable
|
||||||
|
* that it might have become a standby of a new primary in the
|
||||||
|
* intervening period
|
||||||
|
*/
|
||||||
|
|
||||||
|
// ZZZ TODO
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Attempt to set own record as inactive
|
||||||
|
*/
|
||||||
|
sqlquery_snprintf(sqlquery,
|
||||||
|
"UPDATE %s.repl_nodes "
|
||||||
|
" SET active = FALSE "
|
||||||
|
" WHERE id = %i ",
|
||||||
|
get_repmgr_schema_quoted(primary_conn),
|
||||||
|
node_info.node_id);
|
||||||
|
|
||||||
|
res = PQexec(primary_conn, sqlquery);
|
||||||
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
log_err(_("Unable to set local node %i as inactive on primary: %s\n"),
|
||||||
|
node_info.node_id,
|
||||||
|
PQerrorMessage(primary_conn));
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
log_notice(_("Local node %i marked as inactive on primary\n"), node_info.node_id);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user