mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-26 16:46:28 +00:00
Change "primary" to "master"
Personally I prefer "primary", but repmgr uses "master" so let's consolidate on one version of the terminology for clarity.
This commit is contained in:
2
repmgr.h
2
repmgr.h
@@ -55,7 +55,7 @@
|
|||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
UNKNOWN = 0,
|
UNKNOWN = 0,
|
||||||
PRIMARY,
|
MASTER,
|
||||||
STANDBY,
|
STANDBY,
|
||||||
WITNESS
|
WITNESS
|
||||||
} t_server_type;
|
} t_server_type;
|
||||||
|
|||||||
332
repmgrd.c
332
repmgrd.c
@@ -63,10 +63,10 @@ typedef struct s_node_info
|
|||||||
t_configuration_options local_options;
|
t_configuration_options local_options;
|
||||||
PGconn *my_local_conn = NULL;
|
PGconn *my_local_conn = NULL;
|
||||||
|
|
||||||
/* Primary info */
|
/* Master info */
|
||||||
t_configuration_options primary_options;
|
t_configuration_options master_options;
|
||||||
|
|
||||||
PGconn *primary_conn = NULL;
|
PGconn *master_conn = NULL;
|
||||||
|
|
||||||
const char *progname;
|
const char *progname;
|
||||||
|
|
||||||
@@ -91,12 +91,12 @@ static void witness_monitor(void);
|
|||||||
static bool check_connection(PGconn *conn, const char *type);
|
static bool check_connection(PGconn *conn, const char *type);
|
||||||
static bool set_local_node_failed(void);
|
static bool set_local_node_failed(void);
|
||||||
|
|
||||||
static bool update_node_record_set_primary(PGconn *conn, int this_node_id, int old_primary_node_id);
|
static bool update_node_record_set_master(PGconn *conn, int this_node_id, int old_master_node_id);
|
||||||
static bool update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id);
|
static bool update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id);
|
||||||
|
|
||||||
static void update_shared_memory(char *last_wal_standby_applied);
|
static void update_shared_memory(char *last_wal_standby_applied);
|
||||||
static void update_registration(void);
|
static void update_registration(void);
|
||||||
static void do_primary_failover(void);
|
static void do_master_failover(void);
|
||||||
static bool do_upstream_standby_failover(t_node_info upstream_node);
|
static bool do_upstream_standby_failover(t_node_info upstream_node);
|
||||||
|
|
||||||
static t_node_info get_node_info(PGconn *conn, char *cluster, int node_id);
|
static t_node_info get_node_info(PGconn *conn, char *cluster, int node_id);
|
||||||
@@ -124,16 +124,16 @@ static void check_and_create_pid_file(const char *pid_file);
|
|||||||
static void
|
static void
|
||||||
close_connections()
|
close_connections()
|
||||||
{
|
{
|
||||||
if (primary_conn != NULL && PQisBusy(primary_conn) == 1)
|
if (master_conn != NULL && PQisBusy(master_conn) == 1)
|
||||||
cancel_query(primary_conn, local_options.master_response_timeout);
|
cancel_query(master_conn, local_options.master_response_timeout);
|
||||||
|
|
||||||
if (my_local_conn != NULL)
|
if (my_local_conn != NULL)
|
||||||
PQfinish(my_local_conn);
|
PQfinish(my_local_conn);
|
||||||
|
|
||||||
if (primary_conn != NULL && primary_conn != my_local_conn)
|
if (master_conn != NULL && master_conn != my_local_conn)
|
||||||
PQfinish(primary_conn);
|
PQfinish(master_conn);
|
||||||
|
|
||||||
primary_conn = NULL;
|
master_conn = NULL;
|
||||||
my_local_conn = NULL;
|
my_local_conn = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -297,17 +297,17 @@ main(int argc, char **argv)
|
|||||||
do
|
do
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Set my server mode, establish a connection to primary and start
|
* Set my server mode, establish a connection to master and start
|
||||||
* monitor
|
* monitor
|
||||||
*/
|
*/
|
||||||
|
|
||||||
switch (node_info.type)
|
switch (node_info.type)
|
||||||
{
|
{
|
||||||
case PRIMARY:
|
case MASTER:
|
||||||
primary_options.node = local_options.node;
|
master_options.node = local_options.node;
|
||||||
strncpy(primary_options.conninfo, local_options.conninfo,
|
strncpy(master_options.conninfo, local_options.conninfo,
|
||||||
MAXLEN);
|
MAXLEN);
|
||||||
primary_conn = my_local_conn;
|
master_conn = my_local_conn;
|
||||||
|
|
||||||
check_cluster_configuration(my_local_conn);
|
check_cluster_configuration(my_local_conn);
|
||||||
check_node_configuration();
|
check_node_configuration();
|
||||||
@@ -316,14 +316,14 @@ main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
PQfinish(my_local_conn);
|
PQfinish(my_local_conn);
|
||||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||||
primary_conn = my_local_conn;
|
master_conn = my_local_conn;
|
||||||
update_registration();
|
update_registration();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Log startup event */
|
/* Log startup event */
|
||||||
if(startup_event_logged == false)
|
if(startup_event_logged == false)
|
||||||
{
|
{
|
||||||
create_event_record(primary_conn,
|
create_event_record(master_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
local_options.node,
|
local_options.node,
|
||||||
"repmgrd_start",
|
"repmgrd_start",
|
||||||
@@ -332,10 +332,10 @@ main(int argc, char **argv)
|
|||||||
startup_event_logged = true;
|
startup_event_logged = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
log_info(_("starting continuous primary connection check\n"));
|
log_info(_("starting continuous master connection check\n"));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check that primary is still alive.
|
* Check that master is still alive.
|
||||||
* XXX We should also check that the
|
* XXX We should also check that the
|
||||||
* standby servers are sending info
|
* standby servers are sending info
|
||||||
*/
|
*/
|
||||||
@@ -346,7 +346,7 @@ main(int argc, char **argv)
|
|||||||
*/
|
*/
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
if (check_connection(primary_conn, "master"))
|
if (check_connection(master_conn, "master"))
|
||||||
{
|
{
|
||||||
sleep(local_options.monitor_interval_secs);
|
sleep(local_options.monitor_interval_secs);
|
||||||
}
|
}
|
||||||
@@ -368,7 +368,7 @@ main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
PQfinish(my_local_conn);
|
PQfinish(my_local_conn);
|
||||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||||
primary_conn = my_local_conn;
|
master_conn = my_local_conn;
|
||||||
|
|
||||||
if (*local_options.logfile)
|
if (*local_options.logfile)
|
||||||
{
|
{
|
||||||
@@ -393,15 +393,15 @@ main(int argc, char **argv)
|
|||||||
case WITNESS:
|
case WITNESS:
|
||||||
case STANDBY:
|
case STANDBY:
|
||||||
|
|
||||||
/* We need the node id of the primary server as well as a connection to it */
|
/* We need the node id of the master server as well as a connection to it */
|
||||||
log_info(_("connecting to master node '%s'\n"),
|
log_info(_("connecting to master node '%s'\n"),
|
||||||
local_options.cluster_name);
|
local_options.cluster_name);
|
||||||
|
|
||||||
primary_conn = get_master_connection(my_local_conn,
|
master_conn = get_master_connection(my_local_conn,
|
||||||
local_options.cluster_name,
|
local_options.cluster_name,
|
||||||
&primary_options.node, NULL);
|
&master_options.node, NULL);
|
||||||
|
|
||||||
if (primary_conn == NULL)
|
if (master_conn == NULL)
|
||||||
{
|
{
|
||||||
PQExpBufferData errmsg;
|
PQExpBufferData errmsg;
|
||||||
initPQExpBuffer(&errmsg);
|
initPQExpBuffer(&errmsg);
|
||||||
@@ -434,7 +434,7 @@ main(int argc, char **argv)
|
|||||||
/* Log startup event */
|
/* Log startup event */
|
||||||
if(startup_event_logged == false)
|
if(startup_event_logged == false)
|
||||||
{
|
{
|
||||||
create_event_record(primary_conn,
|
create_event_record(master_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
local_options.node,
|
local_options.node,
|
||||||
"repmgrd_start",
|
"repmgrd_start",
|
||||||
@@ -512,8 +512,8 @@ main(int argc, char **argv)
|
|||||||
/*
|
/*
|
||||||
* witness_monitor()
|
* witness_monitor()
|
||||||
*
|
*
|
||||||
* Monitors witness server; attempt to find and connect to new primary
|
* Monitors witness server; attempt to find and connect to new master
|
||||||
* if existing primary connection is lost
|
* if existing master connection is lost
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
witness_monitor(void)
|
witness_monitor(void)
|
||||||
@@ -529,14 +529,14 @@ witness_monitor(void)
|
|||||||
* of a missing master and promotion of a standby by that standby's
|
* of a missing master and promotion of a standby by that standby's
|
||||||
* repmgrd, so we'll loop for a while before giving up.
|
* repmgrd, so we'll loop for a while before giving up.
|
||||||
*/
|
*/
|
||||||
connection_ok = check_connection(primary_conn, "master");
|
connection_ok = check_connection(master_conn, "master");
|
||||||
|
|
||||||
if(connection_ok == false)
|
if(connection_ok == false)
|
||||||
{
|
{
|
||||||
int connection_retries;
|
int connection_retries;
|
||||||
log_debug(_("old primary node ID: %i\n"), primary_options.node);
|
log_debug(_("old master node ID: %i\n"), master_options.node);
|
||||||
|
|
||||||
/* We need to wait a while for the new primary to be promoted */
|
/* We need to wait a while for the new master to be promoted */
|
||||||
log_info(
|
log_info(
|
||||||
_("waiting %i seconds for a new master to be promoted...\n"),
|
_("waiting %i seconds for a new master to be promoted...\n"),
|
||||||
local_options.master_response_timeout
|
local_options.master_response_timeout
|
||||||
@@ -552,31 +552,31 @@ witness_monitor(void)
|
|||||||
connection_retries + 1,
|
connection_retries + 1,
|
||||||
local_options.reconnect_attempts
|
local_options.reconnect_attempts
|
||||||
);
|
);
|
||||||
primary_conn = get_master_connection(my_local_conn,
|
master_conn = get_master_connection(my_local_conn,
|
||||||
local_options.cluster_name, &primary_options.node, NULL);
|
local_options.cluster_name, &master_options.node, NULL);
|
||||||
|
|
||||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
if (PQstatus(master_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_warning(
|
log_warning(
|
||||||
_("unable to determine a valid master server; waiting %i seconds to retry...\n"),
|
_("unable to determine a valid master server; waiting %i seconds to retry...\n"),
|
||||||
local_options.reconnect_intvl
|
local_options.reconnect_intvl
|
||||||
);
|
);
|
||||||
PQfinish(primary_conn);
|
PQfinish(master_conn);
|
||||||
sleep(local_options.reconnect_intvl);
|
sleep(local_options.reconnect_intvl);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_debug(_("new master found with node ID: %i\n"), primary_options.node);
|
log_debug(_("new master found with node ID: %i\n"), master_options.node);
|
||||||
connection_ok = true;
|
connection_ok = true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Update the repl_nodes table from the new primary to reflect the changed
|
* Update the repl_nodes table from the new master to reflect the changed
|
||||||
* node configuration
|
* node configuration
|
||||||
*
|
*
|
||||||
* XXX it would be neat to be able to handle this with e.g. table-based
|
* XXX it would be neat to be able to handle this with e.g. table-based
|
||||||
* logical replication
|
* logical replication
|
||||||
*/
|
*/
|
||||||
copy_configuration(primary_conn, my_local_conn, local_options.cluster_name);
|
copy_configuration(master_conn, my_local_conn, local_options.cluster_name);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -611,9 +611,9 @@ witness_monitor(void)
|
|||||||
* Cancel any query that is still being executed, so i can insert the
|
* Cancel any query that is still being executed, so i can insert the
|
||||||
* current record
|
* current record
|
||||||
*/
|
*/
|
||||||
if (!cancel_query(primary_conn, local_options.master_response_timeout))
|
if (!cancel_query(master_conn, local_options.master_response_timeout))
|
||||||
return;
|
return;
|
||||||
if (wait_connection_availability(primary_conn,
|
if (wait_connection_availability(master_conn,
|
||||||
local_options.master_response_timeout) != 1)
|
local_options.master_response_timeout) != 1)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@@ -633,20 +633,20 @@ witness_monitor(void)
|
|||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Build the SQL to execute on primary
|
* Build the SQL to execute on master
|
||||||
*/
|
*/
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"INSERT INTO %s.repl_monitor "
|
"INSERT INTO %s.repl_monitor "
|
||||||
" (primary_node, standby_node, "
|
" (master_node, standby_node, "
|
||||||
" last_monitor_time, last_apply_time, "
|
" last_monitor_time, last_apply_time, "
|
||||||
" last_wal_primary_location, last_wal_standby_location, "
|
" last_wal_master_location, last_wal_standby_location, "
|
||||||
" replication_lag, apply_lag )"
|
" replication_lag, apply_lag )"
|
||||||
" VALUES(%d, %d, "
|
" VALUES(%d, %d, "
|
||||||
" '%s'::TIMESTAMP WITH TIME ZONE, NULL, "
|
" '%s'::TIMESTAMP WITH TIME ZONE, NULL, "
|
||||||
" pg_current_xlog_location(), NULL, "
|
" pg_current_xlog_location(), NULL, "
|
||||||
" 0, 0) ",
|
" 0, 0) ",
|
||||||
get_repmgr_schema_quoted(my_local_conn),
|
get_repmgr_schema_quoted(my_local_conn),
|
||||||
primary_options.node,
|
master_options.node,
|
||||||
local_options.node,
|
local_options.node,
|
||||||
monitor_witness_timestamp);
|
monitor_witness_timestamp);
|
||||||
|
|
||||||
@@ -654,16 +654,16 @@ witness_monitor(void)
|
|||||||
* Execute the query asynchronously, but don't check for a result. We will
|
* Execute the query asynchronously, but don't check for a result. We will
|
||||||
* check the result next time we pause for a monitor step.
|
* check the result next time we pause for a monitor step.
|
||||||
*/
|
*/
|
||||||
if (PQsendQuery(primary_conn, sqlquery) == 0)
|
if (PQsendQuery(master_conn, sqlquery) == 0)
|
||||||
log_warning(_("query could not be sent to master: %s\n"),
|
log_warning(_("query could not be sent to master: %s\n"),
|
||||||
PQerrorMessage(primary_conn));
|
PQerrorMessage(master_conn));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// ZZZ update description
|
// ZZZ update description
|
||||||
/*
|
/*
|
||||||
* Insert monitor info, this is basically the time and xlog replayed,
|
* Insert monitor info, this is basically the time and xlog replayed,
|
||||||
* applied on standby and current xlog location in primary.
|
* applied on standby and current xlog location in master.
|
||||||
* Also do the math to see how far are we in bytes for being up-to-date
|
* Also do the math to see how far are we in bytes for being up-to-date
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
@@ -671,13 +671,13 @@ standby_monitor(void)
|
|||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char monitor_standby_timestamp[MAXLEN];
|
char monitor_standby_timestamp[MAXLEN];
|
||||||
char last_wal_primary_location[MAXLEN];
|
char last_wal_master_location[MAXLEN];
|
||||||
char last_wal_standby_received[MAXLEN];
|
char last_wal_standby_received[MAXLEN];
|
||||||
char last_wal_standby_applied[MAXLEN];
|
char last_wal_standby_applied[MAXLEN];
|
||||||
char last_wal_standby_applied_timestamp[MAXLEN];
|
char last_wal_standby_applied_timestamp[MAXLEN];
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
|
||||||
XLogRecPtr lsn_primary;
|
XLogRecPtr lsn_master;
|
||||||
XLogRecPtr lsn_standby_received;
|
XLogRecPtr lsn_standby_received;
|
||||||
XLogRecPtr lsn_standby_applied;
|
XLogRecPtr lsn_standby_applied;
|
||||||
|
|
||||||
@@ -689,7 +689,7 @@ standby_monitor(void)
|
|||||||
int upstream_node_id;
|
int upstream_node_id;
|
||||||
t_node_info upstream_node;
|
t_node_info upstream_node;
|
||||||
|
|
||||||
int active_primary_id;
|
int active_master_id;
|
||||||
const char *type = NULL;
|
const char *type = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -710,7 +710,7 @@ standby_monitor(void)
|
|||||||
|
|
||||||
log_err("%s\n", errmsg.data);
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
create_event_record(primary_conn,
|
create_event_record(master_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
local_options.node,
|
local_options.node,
|
||||||
"repmgrd_shutdown",
|
"repmgrd_shutdown",
|
||||||
@@ -725,7 +725,7 @@ standby_monitor(void)
|
|||||||
local_options.node,
|
local_options.node,
|
||||||
&upstream_node_id, NULL);
|
&upstream_node_id, NULL);
|
||||||
|
|
||||||
type = upstream_node_id == primary_options.node
|
type = upstream_node_id == master_options.node
|
||||||
? "master"
|
? "master"
|
||||||
: "upstream";
|
: "upstream";
|
||||||
|
|
||||||
@@ -752,16 +752,16 @@ standby_monitor(void)
|
|||||||
|
|
||||||
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
|
for (connection_retries = 0; connection_retries < local_options.reconnect_attempts; connection_retries++)
|
||||||
{
|
{
|
||||||
primary_conn = get_master_connection(my_local_conn,
|
master_conn = get_master_connection(my_local_conn,
|
||||||
local_options.cluster_name, &primary_options.node, NULL);
|
local_options.cluster_name, &master_options.node, NULL);
|
||||||
if (PQstatus(primary_conn) == CONNECTION_OK)
|
if (PQstatus(master_conn) == CONNECTION_OK)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Connected, we can continue the process so break the
|
* Connected, we can continue the process so break the
|
||||||
* loop
|
* loop
|
||||||
*/
|
*/
|
||||||
log_err(_("connected to node %d, continuing monitoring.\n"),
|
log_err(_("connected to node %d, continuing monitoring.\n"),
|
||||||
primary_options.node);
|
master_options.node);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -775,7 +775,7 @@ standby_monitor(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (PQstatus(primary_conn) != CONNECTION_OK)
|
if (PQstatus(master_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
PQExpBufferData errmsg;
|
PQExpBufferData errmsg;
|
||||||
initPQExpBuffer(&errmsg);
|
initPQExpBuffer(&errmsg);
|
||||||
@@ -799,21 +799,21 @@ standby_monitor(void)
|
|||||||
else if (local_options.failover == AUTOMATIC_FAILOVER)
|
else if (local_options.failover == AUTOMATIC_FAILOVER)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* When we returns from this function we will have a new primary
|
* When we returns from this function we will have a new master
|
||||||
* and a new primary_conn
|
* and a new master_conn
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Failover handling is handled differently depending on whether
|
* Failover handling is handled differently depending on whether
|
||||||
* the failed node is the primary or a cascading standby
|
* the failed node is the master or a cascading standby
|
||||||
*/
|
*/
|
||||||
upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id);
|
upstream_node = get_node_info(my_local_conn, local_options.cluster_name, node_info.upstream_node_id);
|
||||||
|
|
||||||
if(upstream_node.type == PRIMARY)
|
if(upstream_node.type == MASTER)
|
||||||
{
|
{
|
||||||
log_debug(_("failure detected on master node (%i); attempting to promote a standby\n"),
|
log_debug(_("failure detected on master node (%i); attempting to promote a standby\n"),
|
||||||
node_info.upstream_node_id);
|
node_info.upstream_node_id);
|
||||||
do_primary_failover();
|
do_master_failover();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -830,7 +830,7 @@ standby_monitor(void)
|
|||||||
|
|
||||||
log_err("%s\n", errmsg.data);
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
create_event_record(primary_conn,
|
create_event_record(master_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
local_options.node,
|
local_options.node,
|
||||||
"repmgrd_shutdown",
|
"repmgrd_shutdown",
|
||||||
@@ -893,7 +893,7 @@ standby_monitor(void)
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If original primary has gone away we'll need to get the new one
|
* If original master has gone away we'll need to get the new one
|
||||||
* from the upstream node to write monitoring information
|
* from the upstream node to write monitoring information
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -924,19 +924,19 @@ standby_monitor(void)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
active_primary_id = atoi(PQgetvalue(res, 0, 0));
|
active_master_id = atoi(PQgetvalue(res, 0, 0));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
if(active_primary_id != primary_options.node)
|
if(active_master_id != master_options.node)
|
||||||
{
|
{
|
||||||
log_notice(_("connecting to active master (node %i)...\n"), active_primary_id); \
|
log_notice(_("connecting to active master (node %i)...\n"), active_master_id); \
|
||||||
if(primary_conn != NULL)
|
if(master_conn != NULL)
|
||||||
{
|
{
|
||||||
PQfinish(primary_conn);
|
PQfinish(master_conn);
|
||||||
}
|
}
|
||||||
primary_conn = get_master_connection(my_local_conn,
|
master_conn = get_master_connection(my_local_conn,
|
||||||
local_options.cluster_name,
|
local_options.cluster_name,
|
||||||
&primary_options.node, NULL);
|
&master_options.node, NULL);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -944,9 +944,9 @@ standby_monitor(void)
|
|||||||
* Cancel any query that is still being executed, so i can insert the
|
* Cancel any query that is still being executed, so i can insert the
|
||||||
* current record
|
* current record
|
||||||
*/
|
*/
|
||||||
if (!cancel_query(primary_conn, local_options.master_response_timeout))
|
if (!cancel_query(master_conn, local_options.master_response_timeout))
|
||||||
return;
|
return;
|
||||||
if (wait_connection_availability(primary_conn, local_options.master_response_timeout) != 1)
|
if (wait_connection_availability(master_conn, local_options.master_response_timeout) != 1)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Get local xlog info */
|
/* Get local xlog info */
|
||||||
@@ -969,43 +969,43 @@ standby_monitor(void)
|
|||||||
strncpy(last_wal_standby_applied_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
|
strncpy(last_wal_standby_applied_timestamp, PQgetvalue(res, 0, 3), MAXLEN);
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
/* Get primary xlog info */
|
/* Get master xlog info */
|
||||||
sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location()");
|
sqlquery_snprintf(sqlquery, "SELECT pg_current_xlog_location()");
|
||||||
|
|
||||||
res = PQexec(primary_conn, sqlquery);
|
res = PQexec(master_conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("PQexec failed: %s\n"), PQerrorMessage(primary_conn));
|
log_err(_("PQexec failed: %s\n"), PQerrorMessage(master_conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
strncpy(last_wal_primary_location, PQgetvalue(res, 0, 0), MAXLEN);
|
strncpy(last_wal_master_location, PQgetvalue(res, 0, 0), MAXLEN);
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
/* Calculate the lag */
|
/* Calculate the lag */
|
||||||
lsn_primary = lsn_to_xlogrecptr(last_wal_primary_location, NULL);
|
lsn_master = lsn_to_xlogrecptr(last_wal_master_location, NULL);
|
||||||
lsn_standby_received = lsn_to_xlogrecptr(last_wal_standby_received, NULL);
|
lsn_standby_received = lsn_to_xlogrecptr(last_wal_standby_received, NULL);
|
||||||
lsn_standby_applied = lsn_to_xlogrecptr(last_wal_standby_applied, NULL);
|
lsn_standby_applied = lsn_to_xlogrecptr(last_wal_standby_applied, NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Build the SQL to execute on primary
|
* Build the SQL to execute on master
|
||||||
*/
|
*/
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
"INSERT INTO %s.repl_monitor "
|
"INSERT INTO %s.repl_monitor "
|
||||||
" (primary_node, standby_node, "
|
" (master_node, standby_node, "
|
||||||
" last_monitor_time, last_apply_time, "
|
" last_monitor_time, last_apply_time, "
|
||||||
" last_wal_primary_location, last_wal_standby_location, "
|
" last_wal_master_location, last_wal_standby_location, "
|
||||||
" replication_lag, apply_lag ) "
|
" replication_lag, apply_lag ) "
|
||||||
" VALUES(%d, %d, "
|
" VALUES(%d, %d, "
|
||||||
" '%s'::TIMESTAMP WITH TIME ZONE, '%s'::TIMESTAMP WITH TIME ZONE, "
|
" '%s'::TIMESTAMP WITH TIME ZONE, '%s'::TIMESTAMP WITH TIME ZONE, "
|
||||||
" '%s', '%s', "
|
" '%s', '%s', "
|
||||||
" %llu, %llu) ",
|
" %llu, %llu) ",
|
||||||
get_repmgr_schema_quoted(primary_conn),
|
get_repmgr_schema_quoted(master_conn),
|
||||||
primary_options.node, local_options.node,
|
master_options.node, local_options.node,
|
||||||
monitor_standby_timestamp, last_wal_standby_applied_timestamp,
|
monitor_standby_timestamp, last_wal_standby_applied_timestamp,
|
||||||
last_wal_primary_location, last_wal_standby_received,
|
last_wal_master_location, last_wal_standby_received,
|
||||||
(long long unsigned int)(lsn_primary - lsn_standby_received),
|
(long long unsigned int)(lsn_master - lsn_standby_received),
|
||||||
(long long unsigned int)(lsn_standby_received - lsn_standby_applied));
|
(long long unsigned int)(lsn_standby_received - lsn_standby_applied));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1013,20 +1013,20 @@ standby_monitor(void)
|
|||||||
* check the result next time we pause for a monitor step.
|
* check the result next time we pause for a monitor step.
|
||||||
*/
|
*/
|
||||||
log_debug("standby_monitor: %s\n", sqlquery);
|
log_debug("standby_monitor: %s\n", sqlquery);
|
||||||
if (PQsendQuery(primary_conn, sqlquery) == 0)
|
if (PQsendQuery(master_conn, sqlquery) == 0)
|
||||||
log_warning(_("query could not be sent to primary. %s\n"),
|
log_warning(_("query could not be sent to master. %s\n"),
|
||||||
PQerrorMessage(primary_conn));
|
PQerrorMessage(master_conn));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* do_primary_failover()
|
* do_master_failover()
|
||||||
*
|
*
|
||||||
* Handles failover to new cluster primary
|
* Handles failover to new cluster master
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
do_primary_failover(void)
|
do_master_failover(void)
|
||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
@@ -1054,7 +1054,7 @@ do_primary_failover(void)
|
|||||||
t_node_info nodes[FAILOVER_NODES_MAX_CHECK];
|
t_node_info nodes[FAILOVER_NODES_MAX_CHECK];
|
||||||
|
|
||||||
/* Store details of the failed node here */
|
/* Store details of the failed node here */
|
||||||
t_node_info failed_primary = {-1, NO_UPSTREAM_NODE, "", InvalidXLogRecPtr, UNKNOWN, false, false};
|
t_node_info failed_master = {-1, NO_UPSTREAM_NODE, "", InvalidXLogRecPtr, UNKNOWN, false, false};
|
||||||
|
|
||||||
/* Store details of the best candidate for promotion to master here */
|
/* Store details of the best candidate for promotion to master here */
|
||||||
t_node_info best_candidate = {-1, NO_UPSTREAM_NODE, "", InvalidXLogRecPtr, UNKNOWN, false, false};
|
t_node_info best_candidate = {-1, NO_UPSTREAM_NODE, "", InvalidXLogRecPtr, UNKNOWN, false, false};
|
||||||
@@ -1100,11 +1100,11 @@ do_primary_failover(void)
|
|||||||
|
|
||||||
/* Copy details of the failed node */
|
/* Copy details of the failed node */
|
||||||
/* XXX only node_id is actually used later */
|
/* XXX only node_id is actually used later */
|
||||||
if(nodes[i].type == PRIMARY)
|
if(nodes[i].type == MASTER)
|
||||||
{
|
{
|
||||||
failed_primary.node_id = nodes[i].node_id;
|
failed_master.node_id = nodes[i].node_id;
|
||||||
failed_primary.xlog_location = nodes[i].xlog_location;
|
failed_master.xlog_location = nodes[i].xlog_location;
|
||||||
failed_primary.is_ready = nodes[i].is_ready;
|
failed_master.is_ready = nodes[i].is_ready;
|
||||||
}
|
}
|
||||||
|
|
||||||
nodes[i].upstream_node_id = atoi(PQgetvalue(res, i, 3));
|
nodes[i].upstream_node_id = atoi(PQgetvalue(res, i, 3));
|
||||||
@@ -1150,7 +1150,7 @@ do_primary_failover(void)
|
|||||||
if (visible_nodes < (total_nodes / 2.0))
|
if (visible_nodes < (total_nodes / 2.0))
|
||||||
{
|
{
|
||||||
log_err(_("Unable to reach most of the nodes.\n"
|
log_err(_("Unable to reach most of the nodes.\n"
|
||||||
"Let the other standby servers decide which one will be the primary.\n"
|
"Let the other standby servers decide which one will be the master.\n"
|
||||||
"Manual action will be needed to re-add this node to the cluster.\n"));
|
"Manual action will be needed to re-add this node to the cluster.\n"));
|
||||||
terminate(ERR_FAILOVER_FAIL);
|
terminate(ERR_FAILOVER_FAIL);
|
||||||
}
|
}
|
||||||
@@ -1221,7 +1221,7 @@ do_primary_failover(void)
|
|||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("PQexec failed: %s.\nReport an invalid value to not be "
|
log_err(_("PQexec failed: %s.\nReport an invalid value to not be "
|
||||||
" considered as new primary and exit.\n"),
|
" considered as new master and exit.\n"),
|
||||||
PQerrorMessage(my_local_conn));
|
PQerrorMessage(my_local_conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0);
|
sprintf(last_wal_standby_applied, "'%X/%X'", 0, 0);
|
||||||
@@ -1283,7 +1283,7 @@ do_primary_failover(void)
|
|||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("PQexec failed: %s.\nReport an invalid value to not"
|
log_err(_("PQexec failed: %s.\nReport an invalid value to not"
|
||||||
"be considered as new primary and exit.\n"),
|
"be considered as new master and exit.\n"),
|
||||||
PQerrorMessage(node_conn));
|
PQerrorMessage(node_conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
PQfinish(node_conn);
|
PQfinish(node_conn);
|
||||||
@@ -1357,7 +1357,7 @@ do_primary_failover(void)
|
|||||||
my_local_conn = NULL;
|
my_local_conn = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* determine which one is the best candidate to promote to primary
|
* determine which one is the best candidate to promote to master
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < total_nodes; i++)
|
for (i = 0; i < total_nodes; i++)
|
||||||
{
|
{
|
||||||
@@ -1434,12 +1434,12 @@ do_primary_failover(void)
|
|||||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||||
|
|
||||||
/* update node information to reflect new status */
|
/* update node information to reflect new status */
|
||||||
if(update_node_record_set_primary(my_local_conn, node_info.node_id, failed_primary.node_id) == false)
|
if(update_node_record_set_master(my_local_conn, node_info.node_id, failed_master.node_id) == false)
|
||||||
{
|
{
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("unable to update node record for node %i (promoted to master following failure of node %i)"),
|
_("unable to update node record for node %i (promoted to master following failure of node %i)"),
|
||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
failed_primary.node_id);
|
failed_master.node_id);
|
||||||
|
|
||||||
log_err("%s\n", event_details.data);
|
log_err("%s\n", event_details.data);
|
||||||
|
|
||||||
@@ -1459,7 +1459,7 @@ do_primary_failover(void)
|
|||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("node %i promoted to master; old master %i marked as failed"),
|
_("node %i promoted to master; old master %i marked as failed"),
|
||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
failed_primary.node_id);
|
failed_master.node_id);
|
||||||
|
|
||||||
/* my_local_conn is now the master */
|
/* my_local_conn is now the master */
|
||||||
create_event_record(my_local_conn,
|
create_event_record(my_local_conn,
|
||||||
@@ -1470,10 +1470,10 @@ do_primary_failover(void)
|
|||||||
event_details.data);
|
event_details.data);
|
||||||
|
|
||||||
}
|
}
|
||||||
/* local node not promotion candidate - find the new primary */
|
/* local node not promotion candidate - find the new master */
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PGconn *new_primary_conn;
|
PGconn *new_master_conn;
|
||||||
PQExpBufferData event_details;
|
PQExpBufferData event_details;
|
||||||
|
|
||||||
initPQExpBuffer(&event_details);
|
initPQExpBuffer(&event_details);
|
||||||
@@ -1481,12 +1481,12 @@ do_primary_failover(void)
|
|||||||
sleep(10);
|
sleep(10);
|
||||||
|
|
||||||
if (verbose)
|
if (verbose)
|
||||||
log_info(_("node %d is the best candidate to be the new primary, we should follow it...\n"),
|
log_info(_("node %d is the best candidate to be the new master, we should follow it...\n"),
|
||||||
best_candidate.node_id);
|
best_candidate.node_id);
|
||||||
log_debug(_("follow command is: \"%s\"\n"), local_options.follow_command);
|
log_debug(_("follow command is: \"%s\"\n"), local_options.follow_command);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The new primary may some time to be promoted. The follow command
|
* The new master may some time to be promoted. The follow command
|
||||||
* should take care of that.
|
* should take care of that.
|
||||||
*/
|
*/
|
||||||
if (log_type == REPMGR_STDERR && *local_options.logfile)
|
if (log_type == REPMGR_STDERR && *local_options.logfile)
|
||||||
@@ -1496,30 +1496,30 @@ do_primary_failover(void)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* If 9.4 or later, and replication slots in use, we'll need to create a
|
* If 9.4 or later, and replication slots in use, we'll need to create a
|
||||||
* slot on the new primary
|
* slot on the new master
|
||||||
*/
|
*/
|
||||||
new_primary_conn = establish_db_connection(best_candidate.conninfo_str, true);
|
new_master_conn = establish_db_connection(best_candidate.conninfo_str, true);
|
||||||
|
|
||||||
if(local_options.use_replication_slots)
|
if(local_options.use_replication_slots)
|
||||||
{
|
{
|
||||||
if(create_replication_slot(new_primary_conn, node_info.slot_name) == false)
|
if(create_replication_slot(new_master_conn, node_info.slot_name) == false)
|
||||||
{
|
{
|
||||||
|
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("Unable to create slot '%s' on the primary node: %s"),
|
_("Unable to create slot '%s' on the master node: %s"),
|
||||||
node_info.slot_name,
|
node_info.slot_name,
|
||||||
PQerrorMessage(new_primary_conn));
|
PQerrorMessage(new_master_conn));
|
||||||
|
|
||||||
log_err("%s\n", event_details.data);
|
log_err("%s\n", event_details.data);
|
||||||
|
|
||||||
create_event_record(new_primary_conn,
|
create_event_record(new_master_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
"repmgrd_failover_follow",
|
"repmgrd_failover_follow",
|
||||||
false,
|
false,
|
||||||
event_details.data);
|
event_details.data);
|
||||||
|
|
||||||
PQfinish(new_primary_conn);
|
PQfinish(new_master_conn);
|
||||||
terminate(ERR_DB_QUERY);
|
terminate(ERR_DB_QUERY);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1535,7 +1535,7 @@ do_primary_failover(void)
|
|||||||
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
my_local_conn = establish_db_connection(local_options.conninfo, true);
|
||||||
|
|
||||||
/* update node information to reflect new status */
|
/* update node information to reflect new status */
|
||||||
if(update_node_record_set_upstream(new_primary_conn, node_info.node_id, best_candidate.node_id) == false)
|
if(update_node_record_set_upstream(new_master_conn, node_info.node_id, best_candidate.node_id) == false)
|
||||||
{
|
{
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("Unable to update node record for node %i (following new upstream node %i)"),
|
_("Unable to update node record for node %i (following new upstream node %i)"),
|
||||||
@@ -1544,7 +1544,7 @@ do_primary_failover(void)
|
|||||||
|
|
||||||
log_err("%s\n", event_details.data);
|
log_err("%s\n", event_details.data);
|
||||||
|
|
||||||
create_event_record(new_primary_conn,
|
create_event_record(new_master_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
"repmgrd_failover_follow",
|
"repmgrd_failover_follow",
|
||||||
@@ -1555,20 +1555,20 @@ do_primary_failover(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* update internal record for this node*/
|
/* update internal record for this node*/
|
||||||
node_info = get_node_info(new_primary_conn, local_options.cluster_name, local_options.node);
|
node_info = get_node_info(new_master_conn, local_options.cluster_name, local_options.node);
|
||||||
appendPQExpBuffer(&event_details,
|
appendPQExpBuffer(&event_details,
|
||||||
_("Node %i now following new upstream node %i"),
|
_("Node %i now following new upstream node %i"),
|
||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
best_candidate.node_id);
|
best_candidate.node_id);
|
||||||
|
|
||||||
create_event_record(new_primary_conn,
|
create_event_record(new_master_conn,
|
||||||
&local_options,
|
&local_options,
|
||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
"repmgrd_failover_follow",
|
"repmgrd_failover_follow",
|
||||||
true,
|
true,
|
||||||
event_details.data);
|
event_details.data);
|
||||||
|
|
||||||
PQfinish(new_primary_conn);
|
PQfinish(new_master_conn);
|
||||||
termPQExpBuffer(&event_details);
|
termPQExpBuffer(&event_details);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1600,10 +1600,10 @@ do_upstream_standby_failover(t_node_info upstream_node)
|
|||||||
node_info.node_id);
|
node_info.node_id);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Verify that we can still talk to the cluster primary even though
|
* Verify that we can still talk to the cluster master even though
|
||||||
* node upstream is not available
|
* node upstream is not available
|
||||||
*/
|
*/
|
||||||
if (!check_connection(primary_conn, "master"))
|
if (!check_connection(master_conn, "master"))
|
||||||
{
|
{
|
||||||
log_err(_("do_upstream_standby_failover(): Unable to connect to last known master node\n"));
|
log_err(_("do_upstream_standby_failover(): Unable to connect to last known master node\n"));
|
||||||
return false;
|
return false;
|
||||||
@@ -1615,14 +1615,14 @@ do_upstream_standby_failover(t_node_info upstream_node)
|
|||||||
"SELECT id, active, upstream_node_id, type, conninfo "
|
"SELECT id, active, upstream_node_id, type, conninfo "
|
||||||
" FROM %s.repl_nodes "
|
" FROM %s.repl_nodes "
|
||||||
" WHERE id = %i ",
|
" WHERE id = %i ",
|
||||||
get_repmgr_schema_quoted(primary_conn),
|
get_repmgr_schema_quoted(master_conn),
|
||||||
upstream_node_id);
|
upstream_node_id);
|
||||||
|
|
||||||
res = PQexec(primary_conn, sqlquery);
|
res = PQexec(master_conn, sqlquery);
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("unable to query cluster master: %s\n"), PQerrorMessage(primary_conn));
|
log_err(_("unable to query cluster master: %s\n"), PQerrorMessage(master_conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -1638,11 +1638,11 @@ do_upstream_standby_failover(t_node_info upstream_node)
|
|||||||
if(strcmp(PQgetvalue(res, 0, 1), "f") == 0)
|
if(strcmp(PQgetvalue(res, 0, 1), "f") == 0)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Upstream node is an inactive primary, meaning no there are no direct
|
* Upstream node is an inactive master, meaning no there are no direct
|
||||||
* upstream nodes available to reattach to.
|
* upstream nodes available to reattach to.
|
||||||
*
|
*
|
||||||
* XXX For now we'll simply terminate, however it would make sense to
|
* XXX For now we'll simply terminate, however it would make sense to
|
||||||
* provide an option to either try and find the current primary and/or
|
* provide an option to either try and find the current master and/or
|
||||||
* a strategy to connect to a different upstream node
|
* a strategy to connect to a different upstream node
|
||||||
*/
|
*/
|
||||||
if(strcmp(PQgetvalue(res, 0, 4), "master") == 0)
|
if(strcmp(PQgetvalue(res, 0, 4), "master") == 0)
|
||||||
@@ -1679,7 +1679,7 @@ do_upstream_standby_failover(t_node_info upstream_node)
|
|||||||
terminate(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(update_node_record_set_upstream(primary_conn, node_info.node_id, upstream_node_id) == false)
|
if(update_node_record_set_upstream(master_conn, node_info.node_id, upstream_node_id) == false)
|
||||||
{
|
{
|
||||||
terminate(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
@@ -1739,8 +1739,8 @@ check_connection(PGconn *conn, const char *type)
|
|||||||
* set_local_node_failed()
|
* set_local_node_failed()
|
||||||
*
|
*
|
||||||
* If failure of the local node is detected, attempt to connect
|
* If failure of the local node is detected, attempt to connect
|
||||||
* to the current primary server (as stored in the global variable
|
* to the current master server (as stored in the global variable
|
||||||
* `primary_conn`) and update its record to failed.
|
* `master_conn`) and update its record to failed.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
@@ -1748,19 +1748,19 @@ set_local_node_failed(void)
|
|||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
int active_primary_node_id = -1;
|
int active_master_node_id = -1;
|
||||||
char primary_conninfo[MAXLEN];
|
char master_conninfo[MAXLEN];
|
||||||
|
|
||||||
if (!check_connection(primary_conn, "master"))
|
if (!check_connection(master_conn, "master"))
|
||||||
{
|
{
|
||||||
log_err(_("set_local_node_failed(): Unable to connect to last known primary node\n"));
|
log_err(_("set_local_node_failed(): Unable to connect to last known master node\n"));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check that the node `primary_conn` is connected to is node is still
|
* Check that the node `master_conn` is connected to is node is still
|
||||||
* primary - it's just about conceivable that it might have become a
|
* master - it's just about conceivable that it might have become a
|
||||||
* standby of a new primary in the intervening period
|
* standby of a new master in the intervening period
|
||||||
*/
|
*/
|
||||||
|
|
||||||
sqlquery_snprintf(sqlquery,
|
sqlquery_snprintf(sqlquery,
|
||||||
@@ -1768,13 +1768,13 @@ set_local_node_failed(void)
|
|||||||
" FROM %s.repl_nodes "
|
" FROM %s.repl_nodes "
|
||||||
" WHERE type = 'master' "
|
" WHERE type = 'master' "
|
||||||
" AND active IS TRUE ",
|
" AND active IS TRUE ",
|
||||||
get_repmgr_schema_quoted(primary_conn));
|
get_repmgr_schema_quoted(master_conn));
|
||||||
|
|
||||||
res = PQexec(primary_conn, sqlquery);
|
res = PQexec(master_conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
log_err(_("unable to obtain record for active master: %s\n"),
|
log_err(_("unable to obtain record for active master: %s\n"),
|
||||||
PQerrorMessage(primary_conn));
|
PQerrorMessage(master_conn));
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -1785,18 +1785,18 @@ set_local_node_failed(void)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
active_primary_node_id = atoi(PQgetvalue(res, 0, 0));
|
active_master_node_id = atoi(PQgetvalue(res, 0, 0));
|
||||||
strncpy(primary_conninfo, PQgetvalue(res, 0, 1), MAXLEN);
|
strncpy(master_conninfo, PQgetvalue(res, 0, 1), MAXLEN);
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
if(active_primary_node_id != primary_options.node)
|
if(active_master_node_id != master_options.node)
|
||||||
{
|
{
|
||||||
log_notice(_("current active master is %i; attempting to connect\n"),
|
log_notice(_("current active master is %i; attempting to connect\n"),
|
||||||
active_primary_node_id);
|
active_master_node_id);
|
||||||
PQfinish(primary_conn);
|
PQfinish(master_conn);
|
||||||
primary_conn = establish_db_connection(primary_conninfo, false);
|
master_conn = establish_db_connection(master_conninfo, false);
|
||||||
|
|
||||||
if(PQstatus(primary_conn) != CONNECTION_OK)
|
if(PQstatus(master_conn) != CONNECTION_OK)
|
||||||
{
|
{
|
||||||
log_err(_("unable to connect to active master\n"));
|
log_err(_("unable to connect to active master\n"));
|
||||||
return false;
|
return false;
|
||||||
@@ -1813,20 +1813,20 @@ set_local_node_failed(void)
|
|||||||
"UPDATE %s.repl_nodes "
|
"UPDATE %s.repl_nodes "
|
||||||
" SET active = FALSE "
|
" SET active = FALSE "
|
||||||
" WHERE id = %i ",
|
" WHERE id = %i ",
|
||||||
get_repmgr_schema_quoted(primary_conn),
|
get_repmgr_schema_quoted(master_conn),
|
||||||
node_info.node_id);
|
node_info.node_id);
|
||||||
|
|
||||||
res = PQexec(primary_conn, sqlquery);
|
res = PQexec(master_conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
{
|
{
|
||||||
log_err(_("unable to set local node %i as inactive on primary: %s\n"),
|
log_err(_("unable to set local node %i as inactive on master: %s\n"),
|
||||||
node_info.node_id,
|
node_info.node_id,
|
||||||
PQerrorMessage(primary_conn));
|
PQerrorMessage(master_conn));
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
log_notice(_("marking this node (%i) as inactive on primary\n"), node_info.node_id);
|
log_notice(_("marking this node (%i) as inactive on master\n"), node_info.node_id);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1852,11 +1852,11 @@ check_cluster_configuration(PGconn *conn)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there isn't any results then we have not configured a primary node
|
* If there isn't any results then we have not configured a master node
|
||||||
* yet in repmgr or the connection string is pointing to the wrong
|
* yet in repmgr or the connection string is pointing to the wrong
|
||||||
* database.
|
* database.
|
||||||
*
|
*
|
||||||
* XXX if we are the primary, should we try to create the tables needed?
|
* XXX if we are the master, should we try to create the tables needed?
|
||||||
*/
|
*/
|
||||||
if (PQntuples(res) == 0)
|
if (PQntuples(res) == 0)
|
||||||
{
|
{
|
||||||
@@ -1919,16 +1919,16 @@ check_node_configuration(void)
|
|||||||
"INSERT INTO %s.repl_nodes"
|
"INSERT INTO %s.repl_nodes"
|
||||||
" (id, cluster, name, conninfo, priority, witness) "
|
" (id, cluster, name, conninfo, priority, witness) "
|
||||||
" VALUES (%d, '%s', '%s', '%s', 0, FALSE) ",
|
" VALUES (%d, '%s', '%s', '%s', 0, FALSE) ",
|
||||||
get_repmgr_schema_quoted(primary_conn),
|
get_repmgr_schema_quoted(master_conn),
|
||||||
local_options.node,
|
local_options.node,
|
||||||
local_options.cluster_name,
|
local_options.cluster_name,
|
||||||
local_options.node_name,
|
local_options.node_name,
|
||||||
local_options.conninfo);
|
local_options.conninfo);
|
||||||
|
|
||||||
if (!PQexec(primary_conn, sqlquery))
|
if (!PQexec(master_conn, sqlquery))
|
||||||
{
|
{
|
||||||
log_err(_("unable to insert node details, %s\n"),
|
log_err(_("unable to insert node details, %s\n"),
|
||||||
PQerrorMessage(primary_conn));
|
PQerrorMessage(master_conn));
|
||||||
terminate(ERR_BAD_CONFIG);
|
terminate(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2071,12 +2071,12 @@ update_registration(void)
|
|||||||
" SET conninfo = '%s', "
|
" SET conninfo = '%s', "
|
||||||
" priority = %d "
|
" priority = %d "
|
||||||
" WHERE id = %d ",
|
" WHERE id = %d ",
|
||||||
get_repmgr_schema_quoted(primary_conn),
|
get_repmgr_schema_quoted(master_conn),
|
||||||
local_options.conninfo,
|
local_options.conninfo,
|
||||||
local_options.priority,
|
local_options.priority,
|
||||||
local_options.node);
|
local_options.node);
|
||||||
|
|
||||||
res = PQexec(primary_conn, sqlquery);
|
res = PQexec(master_conn, sqlquery);
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
{
|
{
|
||||||
PQExpBufferData errmsg;
|
PQExpBufferData errmsg;
|
||||||
@@ -2084,7 +2084,7 @@ update_registration(void)
|
|||||||
|
|
||||||
appendPQExpBuffer(&errmsg,
|
appendPQExpBuffer(&errmsg,
|
||||||
_("unable to update registration: %s"),
|
_("unable to update registration: %s"),
|
||||||
PQerrorMessage(primary_conn));
|
PQerrorMessage(master_conn));
|
||||||
|
|
||||||
log_err("%s\n", errmsg.data);
|
log_err("%s\n", errmsg.data);
|
||||||
|
|
||||||
@@ -2297,7 +2297,7 @@ parse_node_type(const char *type)
|
|||||||
{
|
{
|
||||||
if(strcmp(type, "master") == 0)
|
if(strcmp(type, "master") == 0)
|
||||||
{
|
{
|
||||||
return PRIMARY;
|
return MASTER;
|
||||||
}
|
}
|
||||||
else if(strcmp(type, "standby") == 0)
|
else if(strcmp(type, "standby") == 0)
|
||||||
{
|
{
|
||||||
@@ -2313,12 +2313,12 @@ parse_node_type(const char *type)
|
|||||||
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
update_node_record_set_primary(PGconn *conn, int this_node_id, int old_primary_node_id)
|
update_node_record_set_master(PGconn *conn, int this_node_id, int old_master_node_id)
|
||||||
{
|
{
|
||||||
PGresult *res;
|
PGresult *res;
|
||||||
char sqlquery[QUERY_STR_LEN];
|
char sqlquery[QUERY_STR_LEN];
|
||||||
|
|
||||||
log_debug(_("Setting failed node %i inactive; marking node %i as primary\n"), old_primary_node_id, this_node_id);
|
log_debug(_("Setting failed node %i inactive; marking node %i as master\n"), old_master_node_id, this_node_id);
|
||||||
|
|
||||||
res = PQexec(conn, "BEGIN");
|
res = PQexec(conn, "BEGIN");
|
||||||
|
|
||||||
@@ -2340,14 +2340,14 @@ update_node_record_set_primary(PGconn *conn, int this_node_id, int old_primary_n
|
|||||||
" AND id = %i ",
|
" AND id = %i ",
|
||||||
get_repmgr_schema_quoted(conn),
|
get_repmgr_schema_quoted(conn),
|
||||||
local_options.cluster_name,
|
local_options.cluster_name,
|
||||||
old_primary_node_id);
|
old_master_node_id);
|
||||||
|
|
||||||
res = PQexec(conn, sqlquery);
|
res = PQexec(conn, sqlquery);
|
||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
{
|
{
|
||||||
log_err(_("Unable to set old primary node %i as inactive: %s\n"),
|
log_err(_("Unable to set old master node %i as inactive: %s\n"),
|
||||||
old_primary_node_id,
|
old_master_node_id,
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|
||||||
@@ -2371,7 +2371,7 @@ update_node_record_set_primary(PGconn *conn, int this_node_id, int old_primary_n
|
|||||||
|
|
||||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
{
|
{
|
||||||
log_err(_("Unable to set current node %i as active primary: %s\n"),
|
log_err(_("Unable to set current node %i as active master: %s\n"),
|
||||||
this_node_id,
|
this_node_id,
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|||||||
Reference in New Issue
Block a user