standby switchover: check replication configuration file ownership

Within a PostgreSQL data directory, all files should have the same
ownership as the data directory itself. PostgreSQL itself expects
this, and ownership of files by another user is likely to cause
problems.

In PostgreSQL 11 or earlier, if "recovery.conf" cannot be moved
by PostgreSQL (because e.g. it is owned by root), it will not be
possible to promote the standby to primary.

In PostgreSQL 12 and later, if "postgresql.auto.conf" on the demotion
candidate (current primary) has incorrect ownership (e.g. owned by
root), repmgr will very likely not be able to modify this file and
write the replication configuration required for the node to rejoin
the cluster as a standby.

Checks added to catch both cases before a switchover is executed.
This commit is contained in:
Ian Barwick
2020-03-04 11:35:52 +09:00
parent 194b6d0948
commit 8f6058c676
5 changed files with 287 additions and 3 deletions

View File

@@ -49,7 +49,7 @@ static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_replication_config_owner(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
/*
* NODE STATUS
*
@@ -803,6 +803,16 @@ do_node_check(void)
exit(return_code);
}
if (runtime_options.replication_config_owner == true)
{
return_code = do_node_check_replication_config_owner(conn,
runtime_options.output_mode,
&node_info,
NULL);
PQfinish(conn);
exit(return_code);
}
if (runtime_options.output_mode == OM_NAGIOS)
{
@@ -1799,11 +1809,11 @@ do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_in
}
initPQExpBuffer(&details);
/*
* Check actual data directory matches that in repmgr.conf; note this requires
* a superuser connection
*/
if (connection_has_pg_settings(conn) == true)
{
/* we expect to have a database connection */
@@ -1913,6 +1923,40 @@ do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_in
return status;
}
/*
* This is not included in the general list output
*/
static
CheckStatus do_node_check_replication_config_owner(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
{
CheckStatus status = CHECK_STATUS_OK;
PQExpBufferData errmsg;
PQExpBufferData details;
if (mode != OM_OPTFORMAT)
{
log_error(_("--replication-config-owner option can only be used with --optformat"));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
initPQExpBuffer(&errmsg);
initPQExpBuffer(&details);
if (check_replication_config_owner(PQserverVersion(conn),
config_file_options.data_directory,
&errmsg, &details) == false)
{
status = CHECK_STATUS_CRITICAL;
}
printf("--replication-config-owner=%s\n",
output_check_status(status));
return status;
}
void
do_node_service(void)