mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
master register: check for existing master presence/active record
This commit is contained in:
238
dbutils.c
238
dbutils.c
@@ -360,6 +360,83 @@ conn_to_param_list(PGconn *conn, t_conninfo_param_list *param_list)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* ===================== */
|
||||
/* transaction functions */
|
||||
/* ===================== */
|
||||
|
||||
bool
|
||||
begin_transaction(PGconn *conn)
|
||||
{
|
||||
PGresult *res;
|
||||
|
||||
log_verbose(LOG_DEBUG, "begin_transaction()");
|
||||
|
||||
res = PQexec(conn, "BEGIN");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_error(_("Unable to begin transaction:\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
|
||||
PQclear(res);
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
commit_transaction(PGconn *conn)
|
||||
{
|
||||
PGresult *res;
|
||||
|
||||
log_verbose(LOG_DEBUG, "commit_transaction()");
|
||||
|
||||
res = PQexec(conn, "COMMIT");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_error(_("Unable to commit transaction:\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
rollback_transaction(PGconn *conn)
|
||||
{
|
||||
PGresult *res;
|
||||
|
||||
log_verbose(LOG_DEBUG, "rollback_transaction()");
|
||||
|
||||
res = PQexec(conn, "ROLLBACK");
|
||||
|
||||
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||
{
|
||||
log_error(_("Unable to rollback transaction:\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* ========================== */
|
||||
/* GUC manipulation functions */
|
||||
/* ========================== */
|
||||
@@ -455,7 +532,7 @@ is_standby(PGconn *conn)
|
||||
|
||||
if (res == NULL || PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("unable to determine if server is in recovery: %s"),
|
||||
log_error(_("unable to determine if server is in recovery:\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
result = -1;
|
||||
}
|
||||
@@ -467,3 +544,162 @@ is_standby(PGconn *conn)
|
||||
PQclear(res);
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the node list from the provided connection and attempt to connect to each node
|
||||
* in turn to definitely establish if it's the cluster primary.
|
||||
*
|
||||
* The node list is returned in the order which makes it likely that the
|
||||
* current primary will be returned first, reducing the number of speculative
|
||||
* connections which need to be made to other nodes.
|
||||
*
|
||||
* If master_conninfo_out points to allocated memory of MAXCONNINFO in length,
|
||||
* the primary server's conninfo string will be copied there.
|
||||
*/
|
||||
|
||||
PGconn *
|
||||
get_master_connection(PGconn *conn,
|
||||
int *master_id, char *master_conninfo_out)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
|
||||
PGconn *remote_conn = NULL;
|
||||
PGresult *res;
|
||||
|
||||
char remote_conninfo_stack[MAXCONNINFO];
|
||||
char *remote_conninfo = &*remote_conninfo_stack;
|
||||
|
||||
int i,
|
||||
node_id;
|
||||
|
||||
/*
|
||||
* If the caller wanted to get a copy of the connection info string, sub
|
||||
* out the local stack pointer for the pointer passed by the caller.
|
||||
*/
|
||||
if (master_conninfo_out != NULL)
|
||||
remote_conninfo = master_conninfo_out;
|
||||
|
||||
if (master_id != NULL)
|
||||
{
|
||||
*master_id = NODE_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* find all registered nodes */
|
||||
log_info(_("retrieving node list"));
|
||||
initPQExpBuffer(&query);
|
||||
appendPQExpBuffer(&query,
|
||||
" SELECT node_id, conninfo, "
|
||||
" CASE WHEN type = 'master' THEN 1 ELSE 2 END AS type_priority"
|
||||
" FROM repmgr.nodes "
|
||||
" WHERE type != 'witness' "
|
||||
"ORDER BY active DESC, type_priority, priority, node_id");
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_master_connection():\n%s", query.data);
|
||||
|
||||
res = PQexec(conn, query.data);
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("unable to retrieve node records:\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
PQclear(res);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
for (i = 0; i < PQntuples(res); i++)
|
||||
{
|
||||
int is_node_standby;
|
||||
|
||||
/* initialize with the values of the current node being processed */
|
||||
node_id = atoi(PQgetvalue(res, i, 0));
|
||||
strncpy(remote_conninfo, PQgetvalue(res, i, 1), MAXCONNINFO);
|
||||
log_verbose(LOG_INFO,
|
||||
_("checking role of cluster node '%i'"),
|
||||
node_id);
|
||||
remote_conn = establish_db_connection(remote_conninfo, false);
|
||||
|
||||
if (PQstatus(remote_conn) != CONNECTION_OK)
|
||||
continue;
|
||||
|
||||
is_node_standby = is_standby(remote_conn);
|
||||
|
||||
if (is_node_standby == -1)
|
||||
{
|
||||
log_error(_("unable to retrieve recovery state from node %i:\n %s"),
|
||||
node_id,
|
||||
PQerrorMessage(remote_conn));
|
||||
PQfinish(remote_conn);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* if is_standby() returns 0, queried node is the master */
|
||||
if (is_node_standby == 0)
|
||||
{
|
||||
PQclear(res);
|
||||
log_debug(_("get_master_connection(): current master node is %i"), node_id);
|
||||
|
||||
if (master_id != NULL)
|
||||
{
|
||||
*master_id = node_id;
|
||||
}
|
||||
|
||||
return remote_conn;
|
||||
}
|
||||
|
||||
PQfinish(remote_conn);
|
||||
}
|
||||
|
||||
PQclear(res);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Return the id of the active master node, or NODE_NOT_FOUND if no
|
||||
* record available.
|
||||
*
|
||||
* This reports the value stored in the database only and
|
||||
* does not verify whether the node is actually available
|
||||
*/
|
||||
int
|
||||
get_master_node_id(PGconn *conn)
|
||||
{
|
||||
PQExpBufferData query;
|
||||
PGresult *res;
|
||||
int retval;
|
||||
|
||||
initPQExpBuffer(&query);
|
||||
appendPQExpBuffer(&query,
|
||||
"SELECT node_id "
|
||||
" FROM repmgr.nodes "
|
||||
" WHERE type = 'master' "
|
||||
" AND active IS TRUE ");
|
||||
|
||||
log_verbose(LOG_DEBUG, "get_master_node_id():\n%s", query.data);
|
||||
|
||||
res = PQexec(conn, query.data);
|
||||
|
||||
termPQExpBuffer(&query);
|
||||
|
||||
if (PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||
{
|
||||
log_error(_("get_master_node_id(): query failed\n %s"),
|
||||
PQerrorMessage(conn));
|
||||
retval = NODE_NOT_FOUND;
|
||||
}
|
||||
else if (PQntuples(res) == 0)
|
||||
{
|
||||
log_verbose(LOG_WARNING, _("get_master_node_id(): no active primary found\n"));
|
||||
retval = NODE_NOT_FOUND;
|
||||
}
|
||||
else
|
||||
{
|
||||
retval = atoi(PQgetvalue(res, 0, 0));
|
||||
}
|
||||
PQclear(res);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
21
dbutils.h
21
dbutils.h
@@ -119,12 +119,18 @@ PGconn *establish_db_connection_by_params(const char *keywords[],
|
||||
|
||||
|
||||
/* conninfo manipulation functions */
|
||||
void initialize_conninfo_params(t_conninfo_param_list *param_list, bool set_defaults);
|
||||
void copy_conninfo_params(t_conninfo_param_list *dest_list, t_conninfo_param_list *source_list);
|
||||
void conn_to_param_list(PGconn *conn, t_conninfo_param_list *param_list);
|
||||
void param_set(t_conninfo_param_list *param_list, const char *param, const char *value);
|
||||
char *param_get(t_conninfo_param_list *param_list, const char *param);
|
||||
bool parse_conninfo_string(const char *conninfo_str, t_conninfo_param_list *param_list, char *errmsg, bool ignore_application_name);
|
||||
void initialize_conninfo_params(t_conninfo_param_list *param_list, bool set_defaults);
|
||||
void copy_conninfo_params(t_conninfo_param_list *dest_list, t_conninfo_param_list *source_list);
|
||||
void conn_to_param_list(PGconn *conn, t_conninfo_param_list *param_list);
|
||||
void param_set(t_conninfo_param_list *param_list, const char *param, const char *value);
|
||||
char *param_get(t_conninfo_param_list *param_list, const char *param);
|
||||
bool parse_conninfo_string(const char *conninfo_str, t_conninfo_param_list *param_list, char *errmsg, bool ignore_application_name);
|
||||
|
||||
/* transaction functions */
|
||||
bool begin_transaction(PGconn *conn);
|
||||
bool commit_transaction(PGconn *conn);
|
||||
bool rollback_transaction(PGconn *conn);
|
||||
bool check_cluster_schema(PGconn *conn);
|
||||
|
||||
/* GUC manipulation functions */
|
||||
bool set_config(PGconn *conn, const char *config_param, const char *config_value);
|
||||
@@ -133,6 +139,7 @@ bool set_config_bool(PGconn *conn, const char *config_param, bool state);
|
||||
/* Server information functions */
|
||||
int get_server_version(PGconn *conn, char *server_version);
|
||||
int is_standby(PGconn *conn);
|
||||
|
||||
PGconn *get_master_connection(PGconn *standby_conn, int *master_id, char *master_conninfo_out);
|
||||
int get_master_node_id(PGconn *conn);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -436,8 +436,8 @@ static void
|
||||
do_master_register(void)
|
||||
{
|
||||
PGconn *conn = NULL;
|
||||
|
||||
|
||||
PGconn *master_conn = NULL;
|
||||
int current_master_id = UNKNOWN_NODE_ID;
|
||||
int ret;
|
||||
|
||||
log_info(_("connecting to master database..."));
|
||||
@@ -468,6 +468,44 @@ do_master_register(void)
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* Ensure there isn't another active master already registered */
|
||||
master_conn = get_master_connection(conn, ¤t_master_id, NULL);
|
||||
|
||||
if (master_conn != NULL)
|
||||
{
|
||||
if (current_master_id != config_file_options.node_id)
|
||||
{
|
||||
/* it's impossible to add a second master to a streaming replication cluster */
|
||||
log_error(_("there is already an active registered master (node ID: %i) in this cluster"), current_master_id);
|
||||
PQfinish(master_conn);
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* we've probably connected to ourselves */
|
||||
PQfinish(master_conn);
|
||||
}
|
||||
|
||||
|
||||
begin_transaction(conn);
|
||||
|
||||
/*
|
||||
* Check if a node with a different ID is registered as master. This shouldn't
|
||||
* happen but could do if an existing master was shut down without being
|
||||
* unregistered.
|
||||
*/
|
||||
|
||||
current_master_id = get_master_node_id(conn);
|
||||
if (current_master_id != NODE_NOT_FOUND && current_master_id != config_file_options.node_id)
|
||||
{
|
||||
log_error(_("another node with id %i is already registered as master"), current_master_id);
|
||||
// attempt to connect, add info/hint depending if active...
|
||||
log_info(_("a streaming replication cluster can have only one master node"));
|
||||
rollback_transaction(conn);
|
||||
PQfinish(conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user