"standby clone": add --recovery-conf-only option

This will generate "recovery.conf" for an existing standby.

Typical use-case is a standby cloned manually from an external data
source (e.g. Barman), where "recovery.conf" needs to be created
(and if required a replication slot).

The --dry-run option will check the pre-requisites but not actually
create "recovery.conf" or a replication slot.

This requires that the upstream node is running, a replication connection
can be made and if required a replication slot can be created.

Implements GitHub #382.
This commit is contained in:
Ian Barwick
2018-02-21 14:37:01 +09:00
parent 22b3a74fa0
commit ee98a3a58e
5 changed files with 424 additions and 25 deletions

View File

@@ -3486,6 +3486,9 @@ create_replication_slot(PGconn *conn, char *slot_name, int server_version_num, P
PGresult *res = NULL;
t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
if (server_version_num == UNKNOWN_SERVER_VERSION_NUM)
server_version_num = get_server_version(conn, NULL);
/*
* Check whether slot exists already; if it exists and is active, that
* means another active standby is using it, which creates an error

View File

@@ -73,6 +73,7 @@ static char datadir_list_filename[MAXLEN];
static char barman_command_buf[MAXLEN] = "";
static void _do_standby_promote_internal(PGconn *conn, const char *data_dir);
static void _do_create_recovery_conf(void);
static void check_barman_config(void);
static void check_source_server(void);
@@ -119,6 +120,7 @@ static ConnectionStatus parse_remote_node_replication_connection(const char *nod
* --recovery-min-apply-delay
* --replication-user (only required if no upstream record)
* --without-barman
* --recovery-conf-only
*/
void
@@ -130,6 +132,14 @@ do_standby_clone(void)
/* dummy node record */
t_node_info node_record = T_NODE_INFO_INITIALIZER;
/*
* --recovery-conf-only provided - we'll handle that separately
*/
if (runtime_options.recovery_conf_only == true)
{
return _do_create_recovery_conf();
}
/*
* conninfo params for the actual upstream node (which might be different
* to the node we're cloning from) to write to recovery.conf
@@ -789,6 +799,372 @@ check_barman_config(void)
}
/*
* _do_create_recovery_conf()
*
* Create recovery.conf for a previously cloned instance.
*
* Prerequisites:
*
* - data directory must be provided
* - the instance should not be running
* - an existing "recovery.conf" file can only be overwritten with
* -F/--force
* - connection parameters for an existing, running node must be provided
* - --upstream-node-id, if provided, will be "primary_conninfo",
* otherwise primary node id; node must exist; unless -F/--force
* provided, must be active and connection possible
* - if replication slots in use, create (respect --dry-run)
*
* not compatible with --no-upstream-connection
*
*/
static void
_do_create_recovery_conf(void)
{
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
t_node_info upstream_node_record = T_NODE_INFO_INITIALIZER;
RecordStatus record_status = RECORD_NOT_FOUND;
char recovery_file_path[MAXPGPATH] = "";
struct stat st;
bool node_is_running = false;
bool slot_creation_required = false;
PGconn *upstream_conn = NULL;
PGconn *upstream_repl_conn = NULL;
get_node_data_directory(local_data_directory);
if (local_data_directory[0] == '\0')
{
log_error(_("no data directory provided"));
log_hint(_("provide the node's \"repmgr.conf\" file with -f/--config-file or the data directory with -D/--pgdata"));
exit(ERR_BAD_CONFIG);
}
/*
* Do some sanity checks on the data directory to make sure
* it contains a valid but dormant instance
*/
switch (check_dir(local_data_directory))
{
case DIR_ERROR:
log_error(_("unable to access specified data directory \"%s\""), local_data_directory);
log_detail("%s", strerror(errno));
exit(ERR_BAD_CONFIG);
break;
case DIR_NOENT:
log_error(_("specified data directory \"%s\" does not exist"), local_data_directory);
exit(ERR_BAD_CONFIG);
break;
case DIR_EMPTY:
log_error(_("specified data directory \"%s\" is empty"), local_data_directory);
exit(ERR_BAD_CONFIG);
break;
case DIR_NOT_EMPTY:
/* Present but not empty */
if (!is_pg_dir(local_data_directory))
{
log_error(_("specified data directory \"%s\" does not contain a PostgreSQL instance"), local_data_directory);
exit(ERR_BAD_CONFIG);
}
if (is_pg_running(local_data_directory))
{
if (runtime_options.force == false)
{
log_error(_("specified data directory \"%s\" appears to contain a running PostgreSQL instance"),
local_data_directory);
log_hint(_("use -F/--force to create \"recovery.conf\" anyway"));
exit(ERR_BAD_CONFIG);
}
node_is_running = true;
if (runtime_options.dry_run == true)
{
log_warning(_("\"recovery.conf\" would be created in an active data directory"));
}
else
{
log_warning(_("creating \"recovery.conf\" in an active data directory"));
}
}
break;
default:
break;
}
/* check connection */
source_conn = establish_db_connection_by_params(&source_conninfo, true);
/* determine node for primary_conninfo */
if (runtime_options.upstream_node_id != UNKNOWN_NODE_ID)
{
upstream_node_id = runtime_options.upstream_node_id;
}
else
{
/* if --upstream-node-id not specifically supplied, get primary node id */
upstream_node_id = get_primary_node_id(source_conn);
if (upstream_node_id == NODE_NOT_FOUND)
{
log_error(_("unable to determine primary node for this replication cluster"));
PQfinish(source_conn);
exit(ERR_BAD_CONFIG);
}
log_debug("primary node determined as: %i", upstream_node_id);
}
/* attempt to retrieve upstream node record */
record_status = get_node_record(source_conn,
upstream_node_id,
&upstream_node_record);
if (record_status != RECORD_FOUND)
{
log_error(_("unable to retrieve node record for upstream node %i"), upstream_node_id);
if (record_status == RECORD_ERROR)
{
log_detail("%s", PQerrorMessage(source_conn));
}
exit(ERR_BAD_CONFIG);
}
/* attempt to retrieve local node record */
record_status = get_node_record(source_conn,
config_file_options.node_id,
&local_node_record);
if (record_status != RECORD_FOUND)
{
log_error(_("unable to retrieve node record for local node %i"), config_file_options.node_id);
if (record_status == RECORD_ERROR)
{
log_detail("%s", PQerrorMessage(source_conn));
}
exit(ERR_BAD_CONFIG);
}
PQfinish(source_conn);
/* connect to upstream (which could be different to source) */
upstream_conn = establish_db_connection(upstream_node_record.conninfo, false);
if (PQstatus(upstream_conn) != CONNECTION_OK)
{
log_error(_("unable to connect to upstream node \"%s\" (ID: %i)"),
upstream_node_record.node_name,
upstream_node_id);
exit(ERR_BAD_CONFIG);
}
/* Set the application name to this node's name */
if (config_file_options.node_name[0] != '\0')
param_set(&recovery_conninfo, "application_name", config_file_options.node_name);
/* Set the replication user from the primary node record */
param_set(&recovery_conninfo, "user", upstream_node_record.repluser);
initialize_conninfo_params(&recovery_conninfo, false);
/* We ignore any application_name set in the primary's conninfo */
parse_conninfo_string(upstream_node_record.conninfo, &recovery_conninfo, NULL, true);
/* check that a replication connection can be made (--force = override) */
upstream_repl_conn = establish_db_connection_by_params(&recovery_conninfo, false);
if (PQstatus(upstream_repl_conn) != CONNECTION_OK)
{
if (runtime_options.force == false)
{
log_error(_("unable to initiate replication connection to upstream node \"%s\" (ID: %i)"),
upstream_node_record.node_name,
upstream_node_id);
PQfinish(upstream_conn);
exit(ERR_BAD_CONFIG);
}
}
/* if replication slots are in use, perform some checks */
if (config_file_options.use_replication_slots == true)
{
PQExpBufferData msg;
t_replication_slot slot_info = T_REPLICATION_SLOT_INITIALIZER;
record_status = get_slot_record(upstream_conn, local_node_record.slot_name, &slot_info);
/* check if replication slot exists*/
if (record_status == RECORD_FOUND)
{
if (slot_info.active == true)
{
initPQExpBuffer(&msg);
appendPQExpBuffer(&msg,
_("an active replication slot named \"%s\" already exists on upstream node \"%s\" (ID: %i)"),
local_node_record.slot_name,
upstream_node_record.node_name,
upstream_node_id);
if (runtime_options.force == false && runtime_options.dry_run == false)
{
log_error("%s", msg.data);
log_hint(_("use -F/--force to continue anyway"));
termPQExpBuffer(&msg);
PQfinish(upstream_conn);
exit(ERR_BAD_CONFIG);
}
log_warning("%s", msg.data);
termPQExpBuffer(&msg);
}
else
{
log_info(_("an inactive replication slot for this node exists on the upstream node"));
}
}
/* if not, if check one can and should be created */
else
{
get_node_replication_stats(upstream_conn, UNKNOWN_SERVER_VERSION_NUM, &upstream_node_record);
if (upstream_node_record.max_replication_slots > upstream_node_record.total_replication_slots)
{
slot_creation_required = true;
}
else
{
initPQExpBuffer(&msg);
appendPQExpBuffer(&msg,
_("insufficient free replicaiton slots on upstream node \"%s\" (ID: %i)"),
upstream_node_record.node_name,
upstream_node_id);
if (runtime_options.force == false && runtime_options.dry_run == false)
{
log_error("%s", msg.data);
log_hint(_("use -F/--force to continue anyway"));
termPQExpBuffer(&msg);
PQfinish(upstream_conn);
exit(ERR_BAD_CONFIG);
}
log_warning("%s", msg.data);
termPQExpBuffer(&msg);
}
}
}
/* check if recovery.conf exists */
maxpath_snprintf(recovery_file_path, "%s/%s", local_data_directory, RECOVERY_COMMAND_FILE);
if (stat(recovery_file_path, &st) == -1)
{
if (errno != ENOENT)
{
log_error(_("unable to check for existing \"recovery.conf\" file in \"%s\""),
local_data_directory);
log_detail("%s", strerror(errno));
exit(ERR_BAD_CONFIG);
}
}
else
{
if (runtime_options.force == false)
{
log_error(_("\"recovery.conf\" already exists in \"%s\""),
local_data_directory);
log_hint(_("use -F/--force to overwrite an existing \"recovery.conf\" file"));
exit(ERR_BAD_CONFIG);
}
if (runtime_options.dry_run == true)
{
log_warning(_("the existing \"recovery.conf\" file would be overwritten"));
}
else
{
log_warning(_("the existing \"recovery.conf\" file will be overwritten"));
}
}
if (runtime_options.dry_run == true)
{
log_info(_("would create \"recovery.conf\" file"));
log_detail(_("data directory is: \"%s\""), local_data_directory);
}
else
{
if (!create_recovery_file(&upstream_node_record, &recovery_conninfo, local_data_directory))
{
log_error(_("unable to create \"recovery.conf\""));
}
else
{
log_notice(_("\"recovery.conf\" created as \"%s\""), recovery_file_path);
if (node_is_running == true)
{
log_hint(_("node must be restarted for the new file to take effect"));
}
}
}
/* add replication slot, if required */
if (slot_creation_required == true)
{
if (runtime_options.dry_run == true)
{
log_info(_("would create replication slot \"%s\" on upstream node \"%s\" (ID: %i)"),
local_node_record.slot_name,
upstream_node_record.node_name,
upstream_node_id);
}
else
{
PQExpBufferData msg;
initPQExpBuffer(&msg);
if (create_replication_slot(upstream_conn,
local_node_record.slot_name,
UNKNOWN_SERVER_VERSION_NUM,
&msg) == false)
{
log_error("%s", msg.data);
PQfinish(upstream_conn);
termPQExpBuffer(&msg);
exit(ERR_BAD_CONFIG);
}
termPQExpBuffer(&msg);
log_notice(_("replication slot \"%s\" created on upstream node \"%s\" (ID: %i)"),
local_node_record.slot_name,
upstream_node_record.node_name,
upstream_node_id);
}
}
PQfinish(upstream_conn);
return;
}
/*
* do_standby_register()
*
@@ -4796,7 +5172,7 @@ run_file_backup(t_node_info *node_record)
if (unlink(tblspc_symlink.data) < 0 && errno != ENOENT)
{
log_error(_("unable to remove tablespace symlink %s"), tblspc_symlink.data);
log_detail("%s", strerror(errno));
r = ERR_BAD_BASEBACKUP;
goto stop_backup;
}
@@ -4836,9 +5212,9 @@ run_file_backup(t_node_info *node_record)
*/
if (unlink(tablespace_map_filename.data) < 0 && errno != ENOENT)
{
log_error(_("unable to remove tablespace_map file %s: %s"),
tablespace_map_filename.data,
strerror(errno));
log_error(_("unable to remove tablespace_map file \"%s\""),
tablespace_map_filename.data);
log_detail("%s", strerror(errno));
r = ERR_BAD_BASEBACKUP;
goto stop_backup;
@@ -5672,6 +6048,8 @@ do_standby_help(void)
" when the intended upstream server does not yet exist\n"));
printf(_(" --upstream-node-id ID of the upstream node to replicate from (optional, defaults to primary node)\n"));
printf(_(" --without-barman do not use Barman even if configured\n"));
printf(_(" --recovery-conf-only create \"recovery.conf\" file for a previously cloned instance\n"));
puts("");
printf(_("STANDBY REGISTER\n"));

View File

@@ -80,6 +80,7 @@ typedef struct
char replication_user[MAXLEN];
char upstream_conninfo[MAXLEN];
bool without_barman;
bool recovery_conf_only;
/* "standby clone"/"standby follow" options */
int upstream_node_id;
@@ -138,14 +139,14 @@ typedef struct
/* output options */ \
false, false, false, \
/* database connection options */ \
"", "", "", "", \
"", "", "", "", \
/* other connection options */ \
"", "", \
"", "", \
/* general node options */ \
UNKNOWN_NODE_ID, "", "", UNKNOWN_NODE_ID, \
/* "standby clone" options */ \
false, CONFIG_FILE_SAMEPATH, false, false, false, "", "", "", \
false, \
false, false, \
/* "standby clone"/"standby follow" options */ \
NO_UPSTREAM_NODE, \
/* "standby register" options */ \
@@ -164,7 +165,7 @@ typedef struct
false, "", CLUSTER_EVENT_LIMIT, \
/* "cluster cleanup" options */ \
0, \
/* Following options for internal use */ \
/* following options for internal use */ \
"/tmp", OM_TEXT \
}

View File

@@ -388,6 +388,11 @@ main(int argc, char **argv)
runtime_options.without_barman = true;
break;
case OPT_RECOVERY_CONF_ONLY:
runtime_options.recovery_conf_only = true;
break;
/*---------------------------
* "standby register" options
*---------------------------
@@ -1014,10 +1019,7 @@ main(int argc, char **argv)
/*
* Check for configuration file items which can be overriden by runtime
* options
*/
/*
* ============================================================================
* =====================================================================
*/
/*
@@ -1495,19 +1497,6 @@ check_cli_parameters(const int action)
}
}
if (runtime_options.event[0])
{
switch (action)
{
case CLUSTER_EVENT:
break;
default:
item_list_append_format(&cli_warnings,
_("--event not required when executing %s"),
action_name(action));
}
}
if (runtime_options.replication_user[0])
{
switch (action)
@@ -1527,6 +1516,32 @@ check_cli_parameters(const int action)
}
}
if (runtime_options.recovery_conf_only == true)
{
switch (action)
{
case STANDBY_CLONE:
break;
default:
item_list_append_format(&cli_warnings,
_("--create-recovery-conf will be ignored when executing %s"),
action_name(action));
}
}
if (runtime_options.event[0])
{
switch (action)
{
case CLUSTER_EVENT:
break;
default:
item_list_append_format(&cli_warnings,
_("--event not required when executing %s"),
action_name(action));
}
}
if (runtime_options.limit_provided)
{
switch (action)

View File

@@ -85,6 +85,7 @@
#define OPT_WAIT_START 1036
#define OPT_REPL_CONN 1037
#define OPT_REMOTE_NODE_ID 1038
#define OPT_RECOVERY_CONF_ONLY 1039
/* deprecated since 3.3 */
#define OPT_DATA_DIR 999
@@ -139,6 +140,7 @@ static struct option long_options[] =
{"upstream-conninfo", required_argument, NULL, OPT_UPSTREAM_CONNINFO},
{"upstream-node-id", required_argument, NULL, OPT_UPSTREAM_NODE_ID},
{"without-barman", no_argument, NULL, OPT_WITHOUT_BARMAN},
{"recovery-conf-only", no_argument, NULL, OPT_RECOVERY_CONF_ONLY},
/* "standby register" options */
{"wait-start", required_argument, NULL, OPT_WAIT_START},