mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
"standby clone": add --recovery-conf-only option
This will generate "recovery.conf" for an existing standby. Typical use-case is a standby cloned manually from an external data source (e.g. Barman), where "recovery.conf" needs to be created (and if required a replication slot). The --dry-run option will check the pre-requisites but not actually create "recovery.conf" or a replication slot. This requires that the upstream node is running, a replication connection can be made and if required a replication slot can be created. Implements GitHub #382.
This commit is contained in:
@@ -3486,6 +3486,9 @@ create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, P
|
||||
PGresult *res = NULL;
|
||||
t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
|
||||
|
||||
if (server_version_num == UNKNOWN_SERVER_VERSION_NUM)
|
||||
server_version_num = get_server_version(conn, NULL);
|
||||
|
||||
/*
|
||||
* Check whether slot exists already; if it exists and is active, that
|
||||
* means another active standby is using it, which creates an error
|
||||
|
||||
@@ -73,6 +73,7 @@ static char datadir_list_filename[MAXLEN];
|
||||
static char barman_command_buf[MAXLEN] = "";
|
||||
|
||||
static void _do_standby_promote_internal(PGconn *conn, const char *data_dir);
|
||||
static void _do_create_recovery_conf(void);
|
||||
|
||||
static void check_barman_config(void);
|
||||
static void check_source_server(void);
|
||||
@@ -119,6 +120,7 @@ static ConnectionStatus parse_remote_node_replication_connection(const char *nod
|
||||
* --recovery-min-apply-delay
|
||||
* --replication-user (only required if no upstream record)
|
||||
* --without-barman
|
||||
* --recovery-conf-only
|
||||
*/
|
||||
|
||||
void
|
||||
@@ -130,6 +132,14 @@ do_standby_clone(void)
|
||||
/* dummy node record */
|
||||
t_node_info node_record = T_NODE_INFO_INITIALIZER;
|
||||
|
||||
/*
|
||||
* --recovery-conf-only provided - we'll handle that separately
|
||||
*/
|
||||
if (runtime_options.recovery_conf_only == true)
|
||||
{
|
||||
return _do_create_recovery_conf();
|
||||
}
|
||||
|
||||
/*
|
||||
* conninfo params for the actual upstream node (which might be different
|
||||
* to the node we're cloning from) to write to recovery.conf
|
||||
@@ -789,6 +799,372 @@ check_barman_config(void)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* _do_create_recovery_conf()
|
||||
*
|
||||
* Create recovery.conf for a previously cloned instance.
|
||||
*
|
||||
* Prerequisites:
|
||||
*
|
||||
* - data directory must be provided
|
||||
* - the instance should not be running
|
||||
* - an existing "recovery.conf" file can only be overwritten with
|
||||
* -F/--force
|
||||
* - connection parameters for an existing, running node must be provided
|
||||
* - --upstream-node-id, if provided, will be "primary_conninfo",
|
||||
* otherwise primary node id; node must exist; unless -F/--force
|
||||
* provided, must be active and connection possible
|
||||
* - if replication slots in use, create (respect --dry-run)
|
||||
*
|
||||
* not compatible with --no-upstream-connection
|
||||
*
|
||||
*/
|
||||
|
||||
static void
|
||||
_do_create_recovery_conf(void)
|
||||
{
|
||||
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
|
||||
t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
|
||||
|
||||
RecordStatus record_status = RECORD_NOT_FOUND;
|
||||
char recovery_file_path[MAXPGPATH] = "";
|
||||
struct stat st;
|
||||
bool node_is_running = false;
|
||||
bool slot_creation_required = false;
|
||||
PGconn *upstream_conn = NULL;
|
||||
PGconn *upstream_repl_conn = NULL;
|
||||
|
||||
get_node_data_directory(local_data_directory);
|
||||
|
||||
if (local_data_directory[0] == '\0')
|
||||
{
|
||||
log_error(_("no data directory provided"));
|
||||
log_hint(_("provide the node's \"repmgr.conf\" file with -f/--config-file or the data directory with -D/--pgdata"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do some sanity checks on the data directory to make sure
|
||||
* it contains a valid but dormant instance
|
||||
*/
|
||||
switch (check_dir(local_data_directory))
|
||||
{
|
||||
case DIR_ERROR:
|
||||
log_error(_("unable to access specified data directory \"%s\""), local_data_directory);
|
||||
log_detail("%s", strerror(errno));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
break;
|
||||
case DIR_NOENT:
|
||||
log_error(_("specified data directory \"%s\" does not exist"), local_data_directory);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
break;
|
||||
case DIR_EMPTY:
|
||||
log_error(_("specified data directory \"%s\" is empty"), local_data_directory);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
break;
|
||||
case DIR_NOT_EMPTY:
|
||||
/* Present but not empty */
|
||||
if (!is_pg_dir(local_data_directory))
|
||||
{
|
||||
log_error(_("specified data directory \"%s\" does not contain a PostgreSQL instance"), local_data_directory);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (is_pg_running(local_data_directory))
|
||||
{
|
||||
if (runtime_options.force == false)
|
||||
{
|
||||
log_error(_("specified data directory \"%s\" appears to contain a running PostgreSQL instance"),
|
||||
local_data_directory);
|
||||
log_hint(_("use -F/--force to create \"recovery.conf\" anyway"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
node_is_running = true;
|
||||
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
log_warning(_("\"recovery.conf\" would be created in an active data directory"));
|
||||
}
|
||||
else
|
||||
{
|
||||
log_warning(_("creating \"recovery.conf\" in an active data directory"));
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* check connection */
|
||||
source_conn = establish_db_connection_by_params(&source_conninfo, true);
|
||||
|
||||
/* determine node for primary_conninfo */
|
||||
|
||||
if (runtime_options.upstream_node_id != UNKNOWN_NODE_ID)
|
||||
{
|
||||
upstream_node_id = runtime_options.upstream_node_id;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* if --upstream-node-id not specifically supplied, get primary node id */
|
||||
upstream_node_id = get_primary_node_id(source_conn);
|
||||
|
||||
if (upstream_node_id == NODE_NOT_FOUND)
|
||||
{
|
||||
log_error(_("unable to determine primary node for this replication cluster"));
|
||||
PQfinish(source_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
log_debug("primary node determined as: %i", upstream_node_id);
|
||||
}
|
||||
|
||||
/* attempt to retrieve upstream node record */
|
||||
record_status = get_node_record(source_conn,
|
||||
upstream_node_id,
|
||||
&upstream_node_record);
|
||||
|
||||
if (record_status != RECORD_FOUND)
|
||||
{
|
||||
log_error(_("unable to retrieve node record for upstream node %i"), upstream_node_id);
|
||||
|
||||
if (record_status == RECORD_ERROR)
|
||||
{
|
||||
log_detail("%s", PQerrorMessage(source_conn));
|
||||
}
|
||||
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* attempt to retrieve local node record */
|
||||
record_status = get_node_record(source_conn,
|
||||
config_file_options.node_id,
|
||||
&local_node_record);
|
||||
|
||||
if (record_status != RECORD_FOUND)
|
||||
{
|
||||
log_error(_("unable to retrieve node record for local node %i"), config_file_options.node_id);
|
||||
|
||||
if (record_status == RECORD_ERROR)
|
||||
{
|
||||
log_detail("%s", PQerrorMessage(source_conn));
|
||||
}
|
||||
|
||||
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
PQfinish(source_conn);
|
||||
|
||||
|
||||
/* connect to upstream (which could be different to source) */
|
||||
|
||||
upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
|
||||
if (PQstatus(upstream_conn) != CONNECTION_OK)
|
||||
{
|
||||
log_error(_("unable to connect to upstream node \"%s\" (ID: %i)"),
|
||||
upstream_node_record.node_name,
|
||||
upstream_node_id);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
/* Set the application name to this node's name */
|
||||
if (config_file_options.node_name[0] != '\0')
|
||||
param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
|
||||
|
||||
/* Set the replication user from the primary node record */
|
||||
param_set(&recovery_conninfo, "user", upstream_node_record.repluser);
|
||||
|
||||
initialize_conninfo_params(&recovery_conninfo, false);
|
||||
|
||||
/* We ignore any application_name set in the primary's conninfo */
|
||||
parse_conninfo_string(upstream_node_record.conninfo, &recovery_conninfo, NULL, true);
|
||||
|
||||
/* check that a replication connection can be made (--force = override) */
|
||||
upstream_repl_conn = establish_db_connection_by_params(&recovery_conninfo, false);
|
||||
|
||||
if (PQstatus(upstream_repl_conn) != CONNECTION_OK)
|
||||
{
|
||||
if (runtime_options.force == false)
|
||||
{
|
||||
log_error(_("unable to initiate replication connection to upstream node \"%s\" (ID: %i)"),
|
||||
upstream_node_record.node_name,
|
||||
upstream_node_id);
|
||||
PQfinish(upstream_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
|
||||
/* if replication slots are in use, perform some checks */
|
||||
if (config_file_options.use_replication_slots == true)
|
||||
{
|
||||
PQExpBufferData msg;
|
||||
t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
|
||||
|
||||
record_status = get_slot_record(upstream_conn, local_node_record.slot_name, &slot_info);
|
||||
|
||||
/* check if replication slot exists*/
|
||||
if (record_status == RECORD_FOUND)
|
||||
{
|
||||
if (slot_info.active == true)
|
||||
{
|
||||
initPQExpBuffer(&msg);
|
||||
|
||||
appendPQExpBuffer(&msg,
|
||||
_("an active replication slot named \"%s\" already exists on upstream node \"%s\" (ID: %i)"),
|
||||
local_node_record.slot_name,
|
||||
upstream_node_record.node_name,
|
||||
upstream_node_id);
|
||||
if (runtime_options.force == false && runtime_options.dry_run == false)
|
||||
{
|
||||
log_error("%s", msg.data);
|
||||
log_hint(_("use -F/--force to continue anyway"));
|
||||
termPQExpBuffer(&msg);
|
||||
PQfinish(upstream_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
log_warning("%s", msg.data);
|
||||
termPQExpBuffer(&msg);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info(_("an inactive replication slot for this node exists on the upstream node"));
|
||||
}
|
||||
}
|
||||
/* if not, if check one can and should be created */
|
||||
else
|
||||
{
|
||||
get_node_replication_stats(upstream_conn, UNKNOWN_SERVER_VERSION_NUM, &upstream_node_record);
|
||||
|
||||
if (upstream_node_record.max_replication_slots > upstream_node_record.total_replication_slots)
|
||||
{
|
||||
slot_creation_required = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
initPQExpBuffer(&msg);
|
||||
|
||||
appendPQExpBuffer(&msg,
|
||||
_("insufficient free replicaiton slots on upstream node \"%s\" (ID: %i)"),
|
||||
upstream_node_record.node_name,
|
||||
upstream_node_id);
|
||||
|
||||
if (runtime_options.force == false && runtime_options.dry_run == false)
|
||||
{
|
||||
log_error("%s", msg.data);
|
||||
log_hint(_("use -F/--force to continue anyway"));
|
||||
termPQExpBuffer(&msg);
|
||||
PQfinish(upstream_conn);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
log_warning("%s", msg.data);
|
||||
termPQExpBuffer(&msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check if recovery.conf exists */
|
||||
|
||||
maxpath_snprintf(recovery_file_path, "%s/%s", local_data_directory, RECOVERY_COMMAND_FILE);
|
||||
|
||||
if (stat(recovery_file_path, &st) == -1)
|
||||
{
|
||||
if (errno != ENOENT)
|
||||
{
|
||||
log_error(_("unable to check for existing \"recovery.conf\" file in \"%s\""),
|
||||
local_data_directory);
|
||||
log_detail("%s", strerror(errno));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (runtime_options.force == false)
|
||||
{
|
||||
log_error(_("\"recovery.conf\" already exists in \"%s\""),
|
||||
local_data_directory);
|
||||
log_hint(_("use -F/--force to overwrite an existing \"recovery.conf\" file"));
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
log_warning(_("the existing \"recovery.conf\" file would be overwritten"));
|
||||
}
|
||||
else
|
||||
{
|
||||
log_warning(_("the existing \"recovery.conf\" file will be overwritten"));
|
||||
}
|
||||
}
|
||||
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
log_info(_("would create \"recovery.conf\" file"));
|
||||
log_detail(_("data directory is: \"%s\""), local_data_directory);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!create_recovery_file(&upstream_node_record, &recovery_conninfo, local_data_directory))
|
||||
{
|
||||
log_error(_("unable to create \"recovery.conf\""));
|
||||
}
|
||||
else
|
||||
{
|
||||
log_notice(_("\"recovery.conf\" created as \"%s\""), recovery_file_path);
|
||||
|
||||
if (node_is_running == true)
|
||||
{
|
||||
log_hint(_("node must be restarted for the new file to take effect"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* add replication slot, if required */
|
||||
if (slot_creation_required == true)
|
||||
{
|
||||
if (runtime_options.dry_run == true)
|
||||
{
|
||||
log_info(_("would create replication slot \"%s\" on upstream node \"%s\" (ID: %i)"),
|
||||
local_node_record.slot_name,
|
||||
upstream_node_record.node_name,
|
||||
upstream_node_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
PQExpBufferData msg;
|
||||
initPQExpBuffer(&msg);
|
||||
|
||||
if (create_replication_slot(upstream_conn,
|
||||
local_node_record.slot_name,
|
||||
UNKNOWN_SERVER_VERSION_NUM,
|
||||
&msg) == false)
|
||||
{
|
||||
log_error("%s", msg.data);
|
||||
PQfinish(upstream_conn);
|
||||
termPQExpBuffer(&msg);
|
||||
exit(ERR_BAD_CONFIG);
|
||||
}
|
||||
|
||||
termPQExpBuffer(&msg);
|
||||
|
||||
log_notice(_("replication slot \"%s\" created on upstream node \"%s\" (ID: %i)"),
|
||||
local_node_record.slot_name,
|
||||
upstream_node_record.node_name,
|
||||
upstream_node_id);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
PQfinish(upstream_conn);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* do_standby_register()
|
||||
*
|
||||
@@ -4796,7 +5172,7 @@ run_file_backup(t_node_info *node_record)
|
||||
if (unlink(tblspc_symlink.data) < 0 && errno != ENOENT)
|
||||
{
|
||||
log_error(_("unable to remove tablespace symlink %s"), tblspc_symlink.data);
|
||||
|
||||
log_detail("%s", strerror(errno));
|
||||
r = ERR_BAD_BASEBACKUP;
|
||||
goto stop_backup;
|
||||
}
|
||||
@@ -4836,9 +5212,9 @@ run_file_backup(t_node_info *node_record)
|
||||
*/
|
||||
if (unlink(tablespace_map_filename.data) < 0 && errno != ENOENT)
|
||||
{
|
||||
log_error(_("unable to remove tablespace_map file %s: %s"),
|
||||
tablespace_map_filename.data,
|
||||
strerror(errno));
|
||||
log_error(_("unable to remove tablespace_map file \"%s\""),
|
||||
tablespace_map_filename.data);
|
||||
log_detail("%s", strerror(errno));
|
||||
|
||||
r = ERR_BAD_BASEBACKUP;
|
||||
goto stop_backup;
|
||||
@@ -5672,6 +6048,8 @@ do_standby_help(void)
|
||||
" when the intended upstream server does not yet exist\n"));
|
||||
printf(_(" --upstream-node-id ID of the upstream node to replicate from (optional, defaults to primary node)\n"));
|
||||
printf(_(" --without-barman do not use Barman even if configured\n"));
|
||||
printf(_(" --recovery-conf-only create \"recovery.conf\" file for a previously cloned instance\n"));
|
||||
|
||||
puts("");
|
||||
|
||||
printf(_("STANDBY REGISTER\n"));
|
||||
|
||||
@@ -80,6 +80,7 @@ typedef struct
|
||||
char replication_user[MAXLEN];
|
||||
char upstream_conninfo[MAXLEN];
|
||||
bool without_barman;
|
||||
bool recovery_conf_only;
|
||||
|
||||
/* "standby clone"/"standby follow" options */
|
||||
int upstream_node_id;
|
||||
@@ -145,7 +146,7 @@ typedef struct
|
||||
UNKNOWN_NODE_ID, "", "", UNKNOWN_NODE_ID, \
|
||||
/* "standby clone" options */ \
|
||||
false, CONFIG_FILE_SAMEPATH, false, false, false, "", "", "", \
|
||||
false, \
|
||||
false, false, \
|
||||
/* "standby clone"/"standby follow" options */ \
|
||||
NO_UPSTREAM_NODE, \
|
||||
/* "standby register" options */ \
|
||||
@@ -164,7 +165,7 @@ typedef struct
|
||||
false, "", CLUSTER_EVENT_LIMIT, \
|
||||
/* "cluster cleanup" options */ \
|
||||
0, \
|
||||
/* Following options for internal use */ \
|
||||
/* following options for internal use */ \
|
||||
"/tmp", OM_TEXT \
|
||||
}
|
||||
|
||||
|
||||
@@ -388,6 +388,11 @@ main(int argc, char **argv)
|
||||
runtime_options.without_barman = true;
|
||||
break;
|
||||
|
||||
case OPT_RECOVERY_CONF_ONLY:
|
||||
runtime_options.recovery_conf_only = true;
|
||||
break;
|
||||
|
||||
|
||||
/*---------------------------
|
||||
* "standby register" options
|
||||
*---------------------------
|
||||
@@ -1014,10 +1019,7 @@ main(int argc, char **argv)
|
||||
/*
|
||||
* Check for configuration file items which can be overriden by runtime
|
||||
* options
|
||||
*/
|
||||
|
||||
/*
|
||||
* ============================================================================
|
||||
* =====================================================================
|
||||
*/
|
||||
|
||||
/*
|
||||
@@ -1495,19 +1497,6 @@ check_cli_parameters(const int action)
|
||||
}
|
||||
}
|
||||
|
||||
if (runtime_options.event[0])
|
||||
{
|
||||
switch (action)
|
||||
{
|
||||
case CLUSTER_EVENT:
|
||||
break;
|
||||
default:
|
||||
item_list_append_format(&cli_warnings,
|
||||
_("--event not required when executing %s"),
|
||||
action_name(action));
|
||||
}
|
||||
}
|
||||
|
||||
if (runtime_options.replication_user[0])
|
||||
{
|
||||
switch (action)
|
||||
@@ -1527,6 +1516,32 @@ check_cli_parameters(const int action)
|
||||
}
|
||||
}
|
||||
|
||||
if (runtime_options.recovery_conf_only == true)
|
||||
{
|
||||
switch (action)
|
||||
{
|
||||
case STANDBY_CLONE:
|
||||
break;
|
||||
default:
|
||||
item_list_append_format(&cli_warnings,
|
||||
_("--create-recovery-conf will be ignored when executing %s"),
|
||||
action_name(action));
|
||||
}
|
||||
}
|
||||
|
||||
if (runtime_options.event[0])
|
||||
{
|
||||
switch (action)
|
||||
{
|
||||
case CLUSTER_EVENT:
|
||||
break;
|
||||
default:
|
||||
item_list_append_format(&cli_warnings,
|
||||
_("--event not required when executing %s"),
|
||||
action_name(action));
|
||||
}
|
||||
}
|
||||
|
||||
if (runtime_options.limit_provided)
|
||||
{
|
||||
switch (action)
|
||||
|
||||
@@ -85,6 +85,7 @@
|
||||
#define OPT_WAIT_START 1036
|
||||
#define OPT_REPL_CONN 1037
|
||||
#define OPT_REMOTE_NODE_ID 1038
|
||||
#define OPT_RECOVERY_CONF_ONLY 1039
|
||||
|
||||
/* deprecated since 3.3 */
|
||||
#define OPT_DATA_DIR 999
|
||||
@@ -139,6 +140,7 @@ static struct option long_options[] =
|
||||
{"upstream-conninfo", required_argument, NULL, OPT_UPSTREAM_CONNINFO},
|
||||
{"upstream-node-id", required_argument, NULL, OPT_UPSTREAM_NODE_ID},
|
||||
{"without-barman", no_argument, NULL, OPT_WITHOUT_BARMAN},
|
||||
{"recovery-conf-only", no_argument, NULL, OPT_RECOVERY_CONF_ONLY},
|
||||
|
||||
/* "standby register" options */
|
||||
{"wait-start", required_argument, NULL, OPT_WAIT_START},
|
||||
|
||||
Reference in New Issue
Block a user