diff --git a/repmgr-action-node.c b/repmgr-action-node.c index c640c999..5e399f4a 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -49,7 +49,7 @@ static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); static CheckStatus do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); - +static CheckStatus do_node_check_replication_config_owner(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); /* * NODE STATUS * @@ -803,6 +803,16 @@ do_node_check(void) exit(return_code); } + if (runtime_options.replication_config_owner == true) + { + return_code = do_node_check_replication_config_owner(conn, + runtime_options.output_mode, + &node_info, + NULL); + PQfinish(conn); + exit(return_code); + } + if (runtime_options.output_mode == OM_NAGIOS) { @@ -1799,11 +1809,11 @@ do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_in } initPQExpBuffer(&details); + /* * Check actual data directory matches that in repmgr.conf; note this requires * a superuser connection */ - if (connection_has_pg_settings(conn) == true) { /* we expect to have a database connection */ @@ -1913,6 +1923,40 @@ do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_in return status; } +/* + * This is not included in the general list output + */ +static +CheckStatus do_node_check_replication_config_owner(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output) +{ + CheckStatus status = CHECK_STATUS_OK; + + PQExpBufferData errmsg; + PQExpBufferData details; + + if (mode != OM_OPTFORMAT) + { + log_error(_("--replication-config-owner option can only be used with --optformat")); + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + + initPQExpBuffer(&errmsg); + initPQExpBuffer(&details); + + if (check_replication_config_owner(PQserverVersion(conn), + config_file_options.data_directory, + &errmsg, &details) == false) + { + status = CHECK_STATUS_CRITICAL; + } + + printf("--replication-config-owner=%s\n", + output_check_status(status)); + + return status; +} + void do_node_service(void) diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index ae954c14..de772a31 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -133,6 +133,8 @@ static NodeStatus parse_node_status_is_shutdown_cleanly(const char *node_status_ static CheckStatus parse_node_check_archiver(const char *node_check_output, int *files, int *threshold); static ConnectionStatus parse_remote_node_replication_connection(const char *node_check_output); static bool parse_data_directory_config(const char *node_check_output); +static bool parse_replication_config_owner(const char *node_check_output); + /* * STANDBY CLONE @@ -3434,6 +3436,8 @@ do_standby_switchover(void) PQExpBufferData remote_command_str; PQExpBufferData command_output; PQExpBufferData node_rejoin_options; + PQExpBufferData errmsg; + PQExpBufferData detailmsg; int r, i; @@ -3526,6 +3530,39 @@ do_standby_switchover(void) exit(ERR_SWITCHOVER_FAIL); } + /* + * Check that the local replication configuration file is owned by the data + * directory owner. + * + * For PostgreSQL 11 and earlier, if PostgreSQL is not able rename "recovery.conf", + * promotion will fail. + * + * For PostgreSQL 12 and later, promotion will not fail even if "postgresql.auto.conf" + * is owned by another user, but we'll check just in case, as it is indicative of a + * poorly configured setup. In any case we will need to check "postgresql.auto.conf" on + * the demotion candidate as the rejoin will fail if we are unable to to write to that. + */ + + initPQExpBuffer(&errmsg); + initPQExpBuffer(&detailmsg); + + if (check_replication_config_owner(PQserverVersion(local_conn), + config_file_options.data_directory, + &errmsg, &detailmsg) == false) + { + log_error("%s", errmsg.data); + log_detail("%s", detailmsg.data); + + termPQExpBuffer(&errmsg); + termPQExpBuffer(&detailmsg); + + PQfinish(local_conn); + exit(ERR_BAD_CONFIG); + } + + termPQExpBuffer(&errmsg); + termPQExpBuffer(&detailmsg); + /* check remote server connection and retrieve its record */ remote_conn = get_primary_connection(local_conn, &remote_node_id, remote_conninfo); @@ -3897,6 +3934,63 @@ do_standby_switchover(void) remote_host); } + /* + * For PostgreSQL 12 and later, check "postgresql.auto.conf" is owned by the + * correct user, otherwise the node will probably not be able to attach to + * the promotion candidate (and is a sign of bad configuration anyway) so we + * will complain vocally. + */ + + if (PQserverVersion(local_conn) >= 120000) + { + initPQExpBuffer(&remote_command_str); + make_remote_repmgr_path(&remote_command_str, &remote_node_record); + + appendPQExpBufferStr(&remote_command_str, "node check --replication-config-owner --optformat -LINFO 2>/dev/null"); + + initPQExpBuffer(&command_output); + command_success = remote_command(remote_host, + runtime_options.remote_user, + remote_command_str.data, + config_file_options.ssh_options, + &command_output); + + termPQExpBuffer(&remote_command_str); + + if (command_success == false) + { + log_error(_("unable to execute \"%s node check --replication-config-owner\" on \"%s\":"), + progname(), remote_host); + log_detail("%s", command_output.data); + + PQfinish(remote_conn); + PQfinish(local_conn); + + termPQExpBuffer(&command_output); + + exit(ERR_BAD_CONFIG); + } + + if (parse_replication_config_owner(command_output.data) == false) + { + log_error(_("\"%s\" file on \"%s\" has incorrect ownership"), + PG_AUTOCONF_FILENAME, + remote_node_record.node_name); + + log_hint(_("check the file has the same owner/group as the data directory")); + + PQfinish(remote_conn); + PQfinish(local_conn); + + termPQExpBuffer(&command_output); + + exit(ERR_BAD_CONFIG); + } + + termPQExpBuffer(&command_output); + } + + /* * populate local node record with current state of various replication-related * values, so we can check for sufficient walsenders and replication slots @@ -8145,6 +8239,59 @@ parse_data_directory_config(const char *node_check_output) } +static bool +parse_replication_config_owner(const char *node_check_output) +{ + bool config_ok = true; + + int c = 0, + argc_item = 0; + char **argv_array = NULL; + int optindex = 0; + + /* We're only interested in this option */ + struct option node_check_options[] = + { + {"replication-config-owner", required_argument, NULL, 'C'}, + {NULL, 0, NULL, 0} + }; + + /* Don't attempt to tokenise an empty string */ + if (!strlen(node_check_output)) + { + return false; + } + + argc_item = parse_output_to_argv(node_check_output, &argv_array); + + /* Reset getopt's optind variable */ + optind = 0; + + /* Prevent getopt from emitting errors */ + opterr = 0; + + while ((c = getopt_long(argc_item, argv_array, "C:", node_check_options, + &optindex)) != -1) + { + switch (c) + { + /* --configured-data-directory */ + case 'C': + { + /* we only care whether it's "OK" or not */ + if (strncmp(optarg, "OK", 2) != 0) + config_ok = false; + } + break; + } + } + + free_parsed_argv(&argv_array); + + return config_ok; +} + + void do_standby_help(void) { diff --git a/repmgr-client-global.h b/repmgr-client-global.h index b6f2fc88..a10e7da2 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -118,6 +118,7 @@ typedef struct bool has_passfile; bool replication_connection; bool data_directory_config; + bool replication_config_owner; /* "node rejoin" options */ char config_files[MAXLEN]; @@ -170,7 +171,7 @@ typedef struct /* "node status" options */ \ false, \ /* "node check" options */ \ - false, false, false, false, false, false, false, false, false, \ + false, false, false, false, false, false, false, false, false, false, \ /* "node rejoin" options */ \ "", \ /* "node service" options */ \ @@ -281,6 +282,7 @@ extern bool drop_replication_slot_if_exists(PGconn *conn, int node_id, char *slo extern standy_join_status check_standby_join(PGconn *primary_conn, t_node_info *primary_node_record, t_node_info *standby_node_record); extern bool check_replication_slots_available(int node_id, PGconn* conn); extern bool check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *follow_target_conn, t_node_info *follow_target_node_record, bool is_rejoin); +extern bool check_replication_config_owner(int pg_version, const char *data_directory, PQExpBufferData *error_msg, PQExpBufferData *detail_msg); extern void check_shared_library(PGconn *conn); extern bool is_repmgrd_running(PGconn *conn); diff --git a/repmgr-client.c b/repmgr-client.c index 17ea8c4a..470ba165 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -533,6 +533,10 @@ main(int argc, char **argv) runtime_options.data_directory_config = true; break; + case OPT_REPLICATION_CONFIG_OWNER: + runtime_options.replication_config_owner = true; + break; + /*-------------------- * "node rejoin" options *-------------------- @@ -4205,6 +4209,91 @@ check_node_can_attach(TimeLineID local_tli, XLogRecPtr local_xlogpos, PGconn *fo } +/* + * Check that the replication configuration file is owned by the user who + * owns the data directory. + */ +extern bool +check_replication_config_owner(int pg_version, const char *data_directory, PQExpBufferData *error_msg, PQExpBufferData *detail_msg) +{ + PQExpBufferData replication_config_file; + struct stat dirstat; + struct stat confstat; + + if (stat(data_directory, &dirstat)) + { + if (error_msg != NULL) + { + appendPQExpBuffer(error_msg, + "unable to check ownership of data directory \"%s\"", + data_directory); + appendPQExpBufferStr(detail_msg, + strerror(errno)); + } + return false; + } + + initPQExpBuffer(&replication_config_file); + + appendPQExpBuffer(&replication_config_file, + "%s/%s", + config_file_options.data_directory, + pg_version >= 120000 ? PG_AUTOCONF_FILENAME : RECOVERY_COMMAND_FILE); + + stat(replication_config_file.data, &confstat); + + if (confstat.st_uid == dirstat.st_uid) + { + termPQExpBuffer(&replication_config_file); + return true; + } + + if (error_msg != NULL) + { + char conf_owner[MAXLEN]; + char dir_owner[MAXLEN]; + struct passwd *pw; + + pw = getpwuid(confstat.st_uid); + if (!pw) + { + maxlen_snprintf(conf_owner, + "(unknown user %i)", + confstat.st_uid); + } + else + { + strncpy(conf_owner, pw->pw_name, MAXLEN); + } + + pw = getpwuid(dirstat.st_uid); + + if (!pw) + { + maxlen_snprintf(conf_owner, + "(unknown user %i)", + dirstat.st_uid); + } + else + { + strncpy(dir_owner, pw->pw_name, MAXLEN); + } + + appendPQExpBuffer(error_msg, + "ownership error for file \"%s\"", + replication_config_file.data); + appendPQExpBuffer(detail_msg, + "file owner is \"%s\", data directory owner is \"%s\"", + conf_owner, + dir_owner); + } + + termPQExpBuffer(&replication_config_file); + + return false; +} + + /* * Simple check to see if "shared_preload_libraries" includes "repmgr". * Parsing "shared_preload_libraries" is non-trivial, as it's potentially diff --git a/repmgr-client.h b/repmgr-client.h index 95abb55a..8131e0c1 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -98,6 +98,7 @@ #define OPT_ENABLE_WAL_RECEIVER 1045 #define OPT_DETAIL 1046 #define OPT_REPMGRD_FORCE_UNPAUSE 1047 +#define OPT_REPLICATION_CONFIG_OWNER 1048 /* deprecated since 4.0 */ #define OPT_CHECK_UPSTREAM_CONFIG 999 @@ -185,6 +186,7 @@ static struct option long_options[] = {"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE}, {"replication-connection", no_argument, NULL, OPT_REPL_CONN}, {"data-directory-config", no_argument, NULL, OPT_DATA_DIRECTORY_CONFIG}, + {"replication-config-owner", no_argument, NULL, OPT_REPLICATION_CONFIG_OWNER}, /* "node rejoin" options */ {"config-files", required_argument, NULL, OPT_CONFIG_FILES},