mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-25 16:16:29 +00:00
repmgr standby follow: main code
This commit is contained in:
47
dbutils.c
47
dbutils.c
@@ -1545,6 +1545,49 @@ update_node_record_set_master(PGconn *conn, int this_node_id)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update node record following change of status
|
||||||
|
* (e.g. inactive primary converted to standby)
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
update_node_record_status(PGconn *conn, int this_node_id, char *type, int upstream_node_id, bool active)
|
||||||
|
{
|
||||||
|
PQExpBufferData query;
|
||||||
|
PGresult *res;
|
||||||
|
|
||||||
|
initPQExpBuffer(&query);
|
||||||
|
|
||||||
|
appendPQExpBuffer(&query,
|
||||||
|
" UPDATE repmgr.nodes "
|
||||||
|
" SET type = '%s', "
|
||||||
|
" upstream_node_id = %i, "
|
||||||
|
" active = %s "
|
||||||
|
" WHERE node_id = %i ",
|
||||||
|
type,
|
||||||
|
upstream_node_id,
|
||||||
|
active ? "TRUE" : "FALSE",
|
||||||
|
this_node_id);
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "update_node_record_status():\n %s", query.data);
|
||||||
|
|
||||||
|
res = PQexec(conn, query.data);
|
||||||
|
termPQExpBuffer(&query);
|
||||||
|
|
||||||
|
if (PQresultStatus(res) != PGRES_COMMAND_OK)
|
||||||
|
{
|
||||||
|
log_error(_("unable to update node record:\n %s"),
|
||||||
|
PQerrorMessage(conn));
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
PQclear(res);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
delete_node_record(PGconn *conn, int node)
|
delete_node_record(PGconn *conn, int node)
|
||||||
@@ -1906,7 +1949,7 @@ create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, P
|
|||||||
slot_name);
|
slot_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
log_debug(_("create_replication_slot(): Creating slot '%s' on upstream"), slot_name);
|
log_debug(_("create_replication_slot(): creating slot '%s' on upstream"), slot_name);
|
||||||
log_verbose(LOG_DEBUG, "create_replication_slot():\n%s", query.data);
|
log_verbose(LOG_DEBUG, "create_replication_slot():\n%s", query.data);
|
||||||
|
|
||||||
res = PQexec(conn, query.data);
|
res = PQexec(conn, query.data);
|
||||||
@@ -1915,7 +1958,7 @@ create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, P
|
|||||||
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
if (!res || PQresultStatus(res) != PGRES_TUPLES_OK)
|
||||||
{
|
{
|
||||||
appendPQExpBuffer(error_msg,
|
appendPQExpBuffer(error_msg,
|
||||||
_("unable to create slot '%s' on the master node: %s\n"),
|
_("unable to create slot '%s' on the upstream node: %s\n"),
|
||||||
slot_name,
|
slot_name,
|
||||||
PQerrorMessage(conn));
|
PQerrorMessage(conn));
|
||||||
PQclear(res);
|
PQclear(res);
|
||||||
|
|||||||
@@ -208,6 +208,7 @@ bool update_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_in
|
|||||||
bool delete_node_record(PGconn *conn, int node);
|
bool delete_node_record(PGconn *conn, int node);
|
||||||
|
|
||||||
bool update_node_record_set_master(PGconn *conn, int this_node_id);
|
bool update_node_record_set_master(PGconn *conn, int this_node_id);
|
||||||
|
bool update_node_record_status(PGconn *conn, int this_node_id, char *type, int upstream_node_id, bool active);
|
||||||
|
|
||||||
/* event record functions */
|
/* event record functions */
|
||||||
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
|
||||||
|
|||||||
@@ -73,6 +73,8 @@ static void copy_configuration_files(void);
|
|||||||
static int run_basebackup(void);
|
static int run_basebackup(void);
|
||||||
static int run_file_backup(void);
|
static int run_file_backup(void);
|
||||||
|
|
||||||
|
static void drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name);
|
||||||
|
|
||||||
static void tablespace_data_append(TablespaceDataList *list, const char *name, const char *oid, const char *location);
|
static void tablespace_data_append(TablespaceDataList *list, const char *name, const char *oid, const char *location);
|
||||||
|
|
||||||
static void get_barman_property(char *dst, char *name, char *local_repmgr_directory);
|
static void get_barman_property(char *dst, char *name, char *local_repmgr_directory);
|
||||||
@@ -1225,7 +1227,20 @@ void
|
|||||||
do_standby_follow(void)
|
do_standby_follow(void)
|
||||||
{
|
{
|
||||||
PGconn *local_conn;
|
PGconn *local_conn;
|
||||||
|
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
int original_upstream_node_id = UNKNOWN_NODE_ID;
|
||||||
|
|
||||||
|
PGconn *master_conn = NULL;
|
||||||
|
int master_id = UNKNOWN_NODE_ID;
|
||||||
|
t_node_info master_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
|
||||||
char data_dir[MAXPGPATH];
|
char data_dir[MAXPGPATH];
|
||||||
|
t_conninfo_param_list recovery_conninfo;
|
||||||
|
char *errmsg = NULL;
|
||||||
|
int query_result;
|
||||||
|
char restart_command[MAXLEN];
|
||||||
|
int r;
|
||||||
|
|
||||||
|
|
||||||
log_verbose(LOG_DEBUG, "do_standby_follow()");
|
log_verbose(LOG_DEBUG, "do_standby_follow()");
|
||||||
|
|
||||||
@@ -1237,9 +1252,6 @@ do_standby_follow(void)
|
|||||||
if (runtime_options.host_param_provided == false)
|
if (runtime_options.host_param_provided == false)
|
||||||
{
|
{
|
||||||
bool success;
|
bool success;
|
||||||
PGconn *master_conn = NULL;
|
|
||||||
char master_conninfo[MAXLEN];
|
|
||||||
int master_id = UNKNOWN_NODE_ID;
|
|
||||||
int timer;
|
int timer;
|
||||||
|
|
||||||
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
||||||
@@ -1268,7 +1280,7 @@ do_standby_follow(void)
|
|||||||
{
|
{
|
||||||
master_conn = get_master_connection(local_conn,
|
master_conn = get_master_connection(local_conn,
|
||||||
&master_id,
|
&master_id,
|
||||||
(char *) &master_conninfo);
|
NULL);
|
||||||
|
|
||||||
if (PQstatus(master_conn) == CONNECTION_OK || runtime_options.wait == false)
|
if (PQstatus(master_conn) == CONNECTION_OK || runtime_options.wait == false)
|
||||||
{
|
{
|
||||||
@@ -1283,8 +1295,250 @@ do_standby_follow(void)
|
|||||||
exit(ERR_BAD_CONFIG);
|
exit(ERR_BAD_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
puts("OK");
|
check_master_standby_version_match(local_conn, master_conn);
|
||||||
|
|
||||||
|
PQfinish(local_conn);
|
||||||
}
|
}
|
||||||
|
/* local data directory and primary server info explictly provided -
|
||||||
|
* attempt to connect to that
|
||||||
|
*
|
||||||
|
* XXX --wait option won't be effective here
|
||||||
|
*/
|
||||||
|
else
|
||||||
|
{
|
||||||
|
master_conn = establish_db_connection_by_params(
|
||||||
|
(const char**)source_conninfo.keywords,
|
||||||
|
(const char**)source_conninfo.values,
|
||||||
|
true);
|
||||||
|
|
||||||
|
master_id = get_master_node_id(master_conn);
|
||||||
|
strncpy(data_dir, runtime_options.data_dir, MAXPGPATH);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (get_recovery_type(master_conn) != RECTYPE_MASTER)
|
||||||
|
{
|
||||||
|
log_error(_("the node to follow is not a master"));
|
||||||
|
// XXX log detail
|
||||||
|
PQfinish(master_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If 9.4 or later, and replication slots in use, we'll need to create a
|
||||||
|
* slot on the new master
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (config_file_options.use_replication_slots)
|
||||||
|
{
|
||||||
|
int server_version_num = get_server_version(master_conn, NULL);
|
||||||
|
|
||||||
|
PQExpBufferData event_details;
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
if (create_replication_slot(master_conn, repmgr_slot_name, server_version_num, &event_details) == false)
|
||||||
|
{
|
||||||
|
log_error("%s", event_details.data);
|
||||||
|
|
||||||
|
create_event_record(master_conn,
|
||||||
|
&config_file_options,
|
||||||
|
config_file_options.node_id,
|
||||||
|
"standby_follow",
|
||||||
|
false,
|
||||||
|
event_details.data);
|
||||||
|
|
||||||
|
PQfinish(master_conn);
|
||||||
|
exit(ERR_DB_QUERY);
|
||||||
|
}
|
||||||
|
|
||||||
|
termPQExpBuffer(&event_details);
|
||||||
|
}
|
||||||
|
|
||||||
|
get_node_record(master_conn, master_id, &master_node_record);
|
||||||
|
|
||||||
|
/* Initialise connection parameters to write as `primary_conninfo` */
|
||||||
|
initialize_conninfo_params(&recovery_conninfo, false);
|
||||||
|
|
||||||
|
/* We ignore any application_name set in the master's conninfo */
|
||||||
|
parse_conninfo_string(master_node_record.conninfo, &recovery_conninfo, errmsg, true);
|
||||||
|
|
||||||
|
|
||||||
|
/* Set the default application name to this node's name */
|
||||||
|
param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
|
||||||
|
|
||||||
|
/* Set the replication user from the master node record */
|
||||||
|
param_set(&recovery_conninfo, "user", master_node_record.repluser);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fetch our node record so we can write application_name, if set,
|
||||||
|
* and to get the upstream node ID, which we'll need to know if
|
||||||
|
* replication slots are in use and we want to delete the old slot.
|
||||||
|
*/
|
||||||
|
query_result = get_node_record(master_conn,
|
||||||
|
config_file_options.node_id,
|
||||||
|
&local_node_record);
|
||||||
|
|
||||||
|
if (query_result != 1)
|
||||||
|
{
|
||||||
|
/* this shouldn't happen, but if it does we'll plough on regardless */
|
||||||
|
log_warning(_("unable to retrieve record for node %i"),
|
||||||
|
config_file_options.node_id);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
t_conninfo_param_list local_node_conninfo;
|
||||||
|
bool parse_success;
|
||||||
|
|
||||||
|
initialize_conninfo_params(&local_node_conninfo, false);
|
||||||
|
|
||||||
|
parse_success = parse_conninfo_string(local_node_record.conninfo, &local_node_conninfo, errmsg, false);
|
||||||
|
|
||||||
|
if (parse_success == false)
|
||||||
|
{
|
||||||
|
/* this shouldn't happen, but if it does we'll plough on regardless */
|
||||||
|
log_warning(_("unable to parse conninfo string \"%s\":\n %s"),
|
||||||
|
local_node_record.conninfo, errmsg);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
char *application_name = param_get(&local_node_conninfo, "application_name");
|
||||||
|
|
||||||
|
if (application_name != NULL && strlen(application_name))
|
||||||
|
param_set(&recovery_conninfo, "application_name", application_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* store the original upstream node id so we can delete the replication slot,
|
||||||
|
* if exists
|
||||||
|
*/
|
||||||
|
if (local_node_record.upstream_node_id != UNKNOWN_NODE_ID)
|
||||||
|
{
|
||||||
|
original_upstream_node_id = local_node_record.upstream_node_id;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
original_upstream_node_id = master_id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
char *list = param_list_to_string(&recovery_conninfo);
|
||||||
|
printf("recovery: %s\n", list);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_info(_("changing standby's master to node %i"), master_id);
|
||||||
|
|
||||||
|
if (!create_recovery_file(data_dir, &recovery_conninfo))
|
||||||
|
{
|
||||||
|
PQfinish(master_conn);
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* restart the service */
|
||||||
|
|
||||||
|
// XXX here check if service is running!! if not, start
|
||||||
|
// ensure that problem with pg_ctl output is caught here
|
||||||
|
if (*config_file_options.service_restart_command)
|
||||||
|
{
|
||||||
|
maxlen_snprintf(restart_command, "%s", config_file_options.service_restart_command);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
maxlen_snprintf(restart_command,
|
||||||
|
"%s %s -w -D %s -m fast restart",
|
||||||
|
make_pg_path("pg_ctl"),
|
||||||
|
config_file_options.pg_ctl_options,
|
||||||
|
data_dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
log_notice(_("restarting server using '%s'"),
|
||||||
|
restart_command);
|
||||||
|
|
||||||
|
r = system(restart_command);
|
||||||
|
if (r != 0)
|
||||||
|
{
|
||||||
|
log_error(_("unable to restart server"));
|
||||||
|
PQfinish(master_conn);
|
||||||
|
exit(ERR_NO_RESTART);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If replication slots are in use, and an inactive one for this node
|
||||||
|
* exists on the former upstream, drop it.
|
||||||
|
*
|
||||||
|
* XXX check if former upstream is current master?
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (config_file_options.use_replication_slots && runtime_options.host_param_provided == false && original_upstream_node_id != UNKNOWN_NODE_ID)
|
||||||
|
{
|
||||||
|
t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
|
||||||
|
int upstream_query_result;
|
||||||
|
|
||||||
|
log_verbose(LOG_INFO, "attempting to remove replication slot from old upstream node %i",
|
||||||
|
original_upstream_node_id);
|
||||||
|
|
||||||
|
/* XXX should we poll for server restart? */
|
||||||
|
local_conn = establish_db_connection(config_file_options.conninfo, true);
|
||||||
|
|
||||||
|
upstream_query_result = get_node_record(local_conn,
|
||||||
|
original_upstream_node_id,
|
||||||
|
&upstream_node_record);
|
||||||
|
|
||||||
|
PQfinish(local_conn);
|
||||||
|
|
||||||
|
|
||||||
|
if (upstream_query_result != 1)
|
||||||
|
{
|
||||||
|
log_warning(_("unable to retrieve node record for old upstream node %i"),
|
||||||
|
original_upstream_node_id);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PGconn *old_upstream_conn = establish_db_connection_quiet(upstream_node_record.conninfo);
|
||||||
|
|
||||||
|
if (PQstatus(old_upstream_conn) != CONNECTION_OK)
|
||||||
|
{
|
||||||
|
log_info(_("unable to connect to old upstream node %i to remove replication slot"),
|
||||||
|
original_upstream_node_id);
|
||||||
|
log_hint(_("if reusing this node, you should manually remove any inactive replication slots"));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
drop_replication_slot_if_exists(old_upstream_conn,
|
||||||
|
original_upstream_node_id,
|
||||||
|
local_node_record.slot_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It's possible this node was an inactive primary - update the
|
||||||
|
* relevant fields to ensure it's marked as an active standby
|
||||||
|
*/
|
||||||
|
if (update_node_record_status(master_conn,
|
||||||
|
config_file_options.node_id,
|
||||||
|
"standby",
|
||||||
|
master_id,
|
||||||
|
true) == false)
|
||||||
|
{
|
||||||
|
log_error(_("unable to update upstream node"));
|
||||||
|
PQfinish(master_conn);
|
||||||
|
|
||||||
|
exit(ERR_BAD_CONFIG);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_notice(_("STANDBY FOLLOW successful"));
|
||||||
|
|
||||||
|
create_event_record(master_conn,
|
||||||
|
&config_file_options,
|
||||||
|
config_file_options.node_id,
|
||||||
|
"standby_follow",
|
||||||
|
true,
|
||||||
|
NULL);
|
||||||
|
|
||||||
|
PQfinish(master_conn);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -2748,3 +3002,33 @@ check_recovery_type(PGconn *conn)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slot_name)
|
||||||
|
{
|
||||||
|
t_replication_slot slot_info;
|
||||||
|
int query_res;
|
||||||
|
|
||||||
|
query_res = get_slot_record(conn,slot_name, &slot_info);
|
||||||
|
|
||||||
|
if (query_res)
|
||||||
|
{
|
||||||
|
if (slot_info.active == false)
|
||||||
|
{
|
||||||
|
if (drop_replication_slot(conn, slot_name) == true)
|
||||||
|
{
|
||||||
|
log_notice(_("replication slot \"%s\" deleted on node %i"), slot_name, node_id);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_error(_("unable to delete replication slot \"%s\" on node %i"), slot_name, node_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* if active replication slot exists, call Houston as we have a problem */
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_warning(_("replication slot \"%s\" is still active on node %i"), slot_name, node_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user