From 91941183bcf8d13d1dd1aad170a117c8480f2cc1 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Thu, 31 Aug 2017 17:54:49 +0900 Subject: [PATCH] Use replication user, if set, when checking replication connections --- repmgr-action-standby.c | 373 +++++++++++++++++++++++++++++++++++++++- repmgr-client-global.h | 2 +- repmgr-client.c | 348 ------------------------------------- 3 files changed, 371 insertions(+), 352 deletions(-) diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 9c117b83..d57077b2 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -64,6 +64,7 @@ static void _do_standby_promote_internal(const char *data_dir); static void check_barman_config(void); static void check_source_server(void); static void check_source_server_via_barman(void); +static bool check_upstream_config(PGconn *conn, int server_version_num, t_node_info *node_info, bool exit_on_error); static void check_primary_standby_version_match(PGconn *conn, PGconn *primary_conn); static void check_recovery_type(PGconn *conn); @@ -2708,8 +2709,6 @@ check_source_server() source_server_version_num = check_server_version(source_conn, "primary", true, NULL); - check_upstream_config(source_conn, source_server_version_num, true); - if (get_cluster_size(source_conn, cluster_size) == false) exit(ERR_DB_QUERY); @@ -2851,6 +2850,8 @@ check_source_server() exit(ERR_BAD_CONFIG); } + check_upstream_config(source_conn, source_server_version_num, &node_record, true); + } @@ -2941,6 +2942,361 @@ check_source_server_via_barman() } +/* + * check_upstream_config() + * + * Perform sanity check on upstream server configuration before starting cloning + * process + * + * TODO: + * - check user is qualified to perform base backup + */ + +static bool +check_upstream_config(PGconn *conn, int server_version_num, t_node_info *node_info, bool exit_on_error) +{ + int i; + bool config_ok = true; + char *wal_error_message = NULL; + t_basebackup_options backup_options = T_BASEBACKUP_OPTIONS_INITIALIZER; + bool backup_options_ok = true; + ItemList backup_option_errors = { NULL, NULL }; + bool xlog_stream = true; + standy_clone_mode mode; + + /* + * Detecting the intended cloning mode + */ + mode = get_standby_clone_mode(); + + /* + * Parse `pg_basebackup_options`, if set, to detect whether --xlog-method + * has been set to something other than `stream` (i.e. `fetch`), as + * this will influence some checks + */ + + backup_options_ok = parse_pg_basebackup_options( + config_file_options.pg_basebackup_options, + &backup_options, server_version_num, + &backup_option_errors); + + if (backup_options_ok == false) + { + if (exit_on_error == true) + { + log_error(_("error(s) encountered parsing 'pg_basebackup_options'")); + print_error_list(&backup_option_errors, LOG_ERR); + log_hint(_("'pg_basebackup_options' is: '%s'"), + config_file_options.pg_basebackup_options); + exit(ERR_BAD_CONFIG); + } + + config_ok = false; + } + + if (strlen(backup_options.xlog_method) && strcmp(backup_options.xlog_method, "stream") != 0) + xlog_stream = false; + + /* Check that WAL level is set correctly */ + { + char *levels_pre96[] = { + "hot_standby", + "logical", + NULL, + }; + + /* + * Note that in 9.6+, "hot_standby" and "archive" are accepted as aliases + * for "replica", but current_setting() will of course always return "replica" + */ + char *levels_96plus[] = { + "replica", + "logical", + NULL, + }; + + char **levels; + int j = 0; + + if (server_version_num < 90600) + { + levels = (char **)levels_pre96; + wal_error_message = _("parameter 'wal_level' must be set to 'hot_standby' or 'logical'"); + } + else + { + levels = (char **)levels_96plus; + wal_error_message = _("parameter 'wal_level' must be set to 'replica' or 'logical'"); + } + + do + { + i = guc_set(conn, "wal_level", "=", levels[j]); + if (i) + { + break; + } + j++; + } while (levels[j] != NULL); + } + + if (i == 0 || i == -1) + { + if (i == 0) + { + log_error("%s", wal_error_message); + } + + if (exit_on_error == true) + { + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + + config_ok = false; + } + + if (config_file_options.use_replication_slots) + { + i = guc_set_typed(conn, "max_replication_slots", ">", + "0", "integer"); + if (i == 0 || i == -1) + { + if (i == 0) + { + log_error(_("parameter \"max_replication_slots\" must be set to at least 1 to enable replication slots")); + log_hint(_("\"max_replication_slots\" should be set to at least the number of expected standbys")); + if (exit_on_error == true) + { + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + + config_ok = false; + } + } + } + /* + * physical replication slots not available or not requested - check if + * there are any circumstances where `wal_keep_segments` should be set + */ + else if (mode != barman) + { + bool check_wal_keep_segments = false; + + /* + * A non-zero `wal_keep_segments` value will almost certainly be required + * if pg_basebackup is being used with --xlog-method=fetch, + * *and* no restore command has been specified + */ + if (xlog_stream == false + && strcmp(config_file_options.restore_command, "") == 0) + { + check_wal_keep_segments = true; + } + + if (check_wal_keep_segments == true) + { + i = guc_set_typed(conn, "wal_keep_segments", ">", "0", "integer"); + + if (i == 0 || i == -1) + { + if (i == 0) + { + log_error(_("parameter \"wal_keep_segments\" on the upstream server must be be set to a non-zero value")); + log_hint(_("Choose a value sufficiently high enough to retain enough WAL " + "until the standby has been cloned and started.\n " + "Alternatively set up WAL archiving using e.g. PgBarman and configure " + "'restore_command' in repmgr.conf to fetch WALs from there.")); + + if (server_version_num >= 90400) + { + log_hint(_("In PostgreSQL 9.4 and later, replication slots can be used, which " + "do not require \"wal_keep_segments\" to be set " + "(set parameter \"use_replication_slots\" in repmgr.conf to enable)\n" + )); + } + } + + if (exit_on_error == true) + { + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + + config_ok = false; + } + } + } + + + /* + * If archive_mode is enabled, check that 'archive_command' is non empty + * (however it's not practical to check that it actually represents a valid + * command). + * + * From PostgreSQL 9.5, archive_mode can be one of 'off', 'on' or 'always' + * so for ease of backwards compatibility, rather than explicitly check for an + * enabled mode, check that it's not "off". + */ + + if (guc_set(conn, "archive_mode", "!=", "off")) + { + i = guc_set(conn, "archive_command", "!=", ""); + + if (i == 0 || i == -1) + { + if (i == 0) + log_error(_("parameter \"archive_command\" must be set to a valid command")); + + if (exit_on_error == true) + { + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + + config_ok = false; + } + } + + + /* + * Check that 'hot_standby' is on. This isn't strictly necessary + * for the primary server, however the assumption is that we'll be + * cloning standbys and thus copying the primary configuration; + * this way the standby will be correctly configured by default. + */ + + i = guc_set(conn, "hot_standby", "=", "on"); + if (i == 0 || i == -1) + { + if (i == 0) + { + log_error(_("parameter 'hot_standby' must be set to 'on'")); + } + + if (exit_on_error == true) + { + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + + config_ok = false; + } + + i = guc_set_typed(conn, "max_wal_senders", ">", "0", "integer"); + if (i == 0 || i == -1) + { + if (i == 0) + { + log_error(_("parameter \"max_wal_senders\" must be set to be at least 1")); + log_hint(_("\"max_wal_senders\" should be set to at least the number of expected standbys")); + } + + if (exit_on_error == true) + { + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + + config_ok = false; + } + + /* + * If using pg_basebackup, ensure sufficient replication connections can be made. + * There's no guarantee they'll still be available by the time pg_basebackup + * is executed, but there's nothing we can do about that. + */ + if (mode == pg_basebackup) + { + + PGconn **connections; + int i; + int min_replication_connections = 1, + possible_replication_connections = 0; + + t_conninfo_param_list repl_conninfo; + + /* Make a copy of the connection parameter arrays, and append "replication" */ + + initialize_conninfo_params(&repl_conninfo, false); + + conn_to_param_list(conn, &repl_conninfo); + + param_set(&repl_conninfo, "replication", "1"); + + if (*runtime_options.replication_user) + { + param_set(&repl_conninfo, "user", runtime_options.replication_user); + } + else if (node_info->repluser[0] != '\n') + { + param_set(&repl_conninfo, "user", node_info->repluser); + } + + /* + * work out how many replication connections are required (1 or 2) + */ + + if (xlog_stream == true) + min_replication_connections += 1; + + log_verbose(LOG_NOTICE, "checking for available walsenders on upstream node (%i required)", + min_replication_connections); + + connections = pg_malloc0(sizeof(PGconn *) * min_replication_connections); + + /* Attempt to create the minimum number of required concurrent connections */ + for (i = 0; i < min_replication_connections; i++) + { + PGconn *replication_conn; + + replication_conn = establish_db_connection_by_params(&repl_conninfo, false); + + if (PQstatus(replication_conn) == CONNECTION_OK) + { + connections[i] = replication_conn; + possible_replication_connections++; + } + } + + /* Close previously created connections */ + for (i = 0; i < possible_replication_connections; i++) + { + PQfinish(connections[i]); + } + + pfree(connections); + free_conninfo_params(&repl_conninfo); + + if (possible_replication_connections < min_replication_connections) + { + config_ok = false; + + /* + * XXX at this point we could check current_setting('max_wal_senders) - COUNT(*) FROM pg_stat_replication; + * if >= min_replication_connections we could infer possible authentication error. + * + * Alternatively call PQconnectStart() and poll for presence/absence of CONNECTION_AUTH_OK ? + */ + log_error(_("unable to establish necessary replication connections")); + log_hint(_("increase \"max_wal_senders\" by at least %i"), + min_replication_connections - possible_replication_connections); + + if (exit_on_error == true) + { + PQfinish(conn); + exit(ERR_BAD_CONFIG); + } + } + + log_verbose(LOG_INFO, "sufficient walsenders available on upstream node (%i required)", + min_replication_connections); + } + + return config_ok; +} + /* * initialise_direct_clone() @@ -3126,7 +3482,13 @@ run_basebackup(t_node_info *node_record) (void) parse_conninfo_string(runtime_options.dbname, &conninfo, NULL, false); if (*runtime_options.replication_user) + { param_set(&conninfo, "user", runtime_options.replication_user); + } + else + { + param_set(&conninfo, "user", node_record->repluser); + } conninfo_str = param_list_to_string(&conninfo); @@ -3155,10 +3517,15 @@ run_basebackup(t_node_info *node_record) { appendPQExpBuffer(¶ms, " -U %s", runtime_options.replication_user); } + else if (strlen(node_record->repluser)) + { + appendPQExpBuffer(¶ms, " -U %s", node_record->repluser); + } else if (strlen(runtime_options.username)) { appendPQExpBuffer(¶ms, " -U %s", runtime_options.username); } + } if (runtime_options.fast_checkpoint) { @@ -4414,7 +4781,7 @@ do_standby_help(void) printf(_(" --upstream-conninfo \"primary_conninfo\" value to write in recovery.conf\n" \ " when the intended upstream server does not yet exist\n")); printf(_(" -R, --remote-user=USERNAME database server username for SSH operations (default: \"%s\")\n"), runtime_options.username); - printf(_(" --replication-user username to set in \"primary_conninfo\" in recovery.conf\n")); + printf(_(" --replication-user user to make replication connections with (optional, not usually required)\n")); printf(_(" --without-barman do not use Barman even if configured\n")); puts(""); diff --git a/repmgr-client-global.h b/repmgr-client-global.h index 81ede1fb..fa807539 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -177,7 +177,7 @@ extern int check_server_version(PGconn *conn, char *server_type, bool exit_on_er extern bool create_repmgr_extension(PGconn *conn); extern int test_ssh_connection(char *host, char *remote_user); extern bool local_command(const char *command, PQExpBufferData *outputbuf); -extern bool check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error); + extern standy_clone_mode get_standby_clone_mode(void); extern int copy_remote_files(char *host, char *remote_user, char *remote_path, diff --git a/repmgr-client.c b/repmgr-client.c index ccfbd0b3..d944d78b 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -2036,354 +2036,6 @@ get_superuser_connection(PGconn **conn, PGconn **superuser_conn, PGconn **privil } -/* - * check_upstream_config() - * - * Perform sanity check on upstream server configuration before starting cloning - * process - * - * TODO: - * - check user is qualified to perform base backup - */ - -bool -check_upstream_config(PGconn *conn, int server_version_num, bool exit_on_error) -{ - int i; - bool config_ok = true; - char *wal_error_message = NULL; - t_basebackup_options backup_options = T_BASEBACKUP_OPTIONS_INITIALIZER; - bool backup_options_ok = true; - ItemList backup_option_errors = { NULL, NULL }; - bool xlog_stream = true; - standy_clone_mode mode; - - /* - * Detecting the intended cloning mode - */ - mode = get_standby_clone_mode(); - - /* - * Parse `pg_basebackup_options`, if set, to detect whether --xlog-method - * has been set to something other than `stream` (i.e. `fetch`), as - * this will influence some checks - */ - - backup_options_ok = parse_pg_basebackup_options( - config_file_options.pg_basebackup_options, - &backup_options, server_version_num, - &backup_option_errors); - - if (backup_options_ok == false) - { - if (exit_on_error == true) - { - log_error(_("error(s) encountered parsing 'pg_basebackup_options'")); - print_error_list(&backup_option_errors, LOG_ERR); - log_hint(_("'pg_basebackup_options' is: '%s'"), - config_file_options.pg_basebackup_options); - exit(ERR_BAD_CONFIG); - } - - config_ok = false; - } - - if (strlen(backup_options.xlog_method) && strcmp(backup_options.xlog_method, "stream") != 0) - xlog_stream = false; - - /* Check that WAL level is set correctly */ - { - char *levels_pre96[] = { - "hot_standby", - "logical", - NULL, - }; - - /* - * Note that in 9.6+, "hot_standby" and "archive" are accepted as aliases - * for "replica", but current_setting() will of course always return "replica" - */ - char *levels_96plus[] = { - "replica", - "logical", - NULL, - }; - - char **levels; - int j = 0; - - if (server_version_num < 90600) - { - levels = (char **)levels_pre96; - wal_error_message = _("parameter 'wal_level' must be set to 'hot_standby' or 'logical'"); - } - else - { - levels = (char **)levels_96plus; - wal_error_message = _("parameter 'wal_level' must be set to 'replica' or 'logical'"); - } - - do - { - i = guc_set(conn, "wal_level", "=", levels[j]); - if (i) - { - break; - } - j++; - } while (levels[j] != NULL); - } - - if (i == 0 || i == -1) - { - if (i == 0) - { - log_error("%s", wal_error_message); - } - - if (exit_on_error == true) - { - PQfinish(conn); - exit(ERR_BAD_CONFIG); - } - - config_ok = false; - } - - if (config_file_options.use_replication_slots) - { - i = guc_set_typed(conn, "max_replication_slots", ">", - "0", "integer"); - if (i == 0 || i == -1) - { - if (i == 0) - { - log_error(_("parameter \"max_replication_slots\" must be set to at least 1 to enable replication slots")); - log_hint(_("\"max_replication_slots\" should be set to at least the number of expected standbys")); - if (exit_on_error == true) - { - PQfinish(conn); - exit(ERR_BAD_CONFIG); - } - - config_ok = false; - } - } - } - /* - * physical replication slots not available or not requested - check if - * there are any circumstances where `wal_keep_segments` should be set - */ - else if (mode != barman) - { - bool check_wal_keep_segments = false; - - /* - * A non-zero `wal_keep_segments` value will almost certainly be required - * if pg_basebackup is being used with --xlog-method=fetch, - * *and* no restore command has been specified - */ - if (xlog_stream == false - && strcmp(config_file_options.restore_command, "") == 0) - { - check_wal_keep_segments = true; - } - - if (check_wal_keep_segments == true) - { - i = guc_set_typed(conn, "wal_keep_segments", ">", "0", "integer"); - - if (i == 0 || i == -1) - { - if (i == 0) - { - log_error(_("parameter \"wal_keep_segments\" on the upstream server must be be set to a non-zero value")); - log_hint(_("Choose a value sufficiently high enough to retain enough WAL " - "until the standby has been cloned and started.\n " - "Alternatively set up WAL archiving using e.g. PgBarman and configure " - "'restore_command' in repmgr.conf to fetch WALs from there.")); - - if (server_version_num >= 90400) - { - log_hint(_("In PostgreSQL 9.4 and later, replication slots can be used, which " - "do not require \"wal_keep_segments\" to be set " - "(set parameter \"use_replication_slots\" in repmgr.conf to enable)\n" - )); - } - } - - if (exit_on_error == true) - { - PQfinish(conn); - exit(ERR_BAD_CONFIG); - } - - config_ok = false; - } - } - } - - - /* - * If archive_mode is enabled, check that 'archive_command' is non empty - * (however it's not practical to check that it actually represents a valid - * command). - * - * From PostgreSQL 9.5, archive_mode can be one of 'off', 'on' or 'always' - * so for ease of backwards compatibility, rather than explicitly check for an - * enabled mode, check that it's not "off". - */ - - if (guc_set(conn, "archive_mode", "!=", "off")) - { - i = guc_set(conn, "archive_command", "!=", ""); - - if (i == 0 || i == -1) - { - if (i == 0) - log_error(_("parameter \"archive_command\" must be set to a valid command")); - - if (exit_on_error == true) - { - PQfinish(conn); - exit(ERR_BAD_CONFIG); - } - - config_ok = false; - } - } - - - /* - * Check that 'hot_standby' is on. This isn't strictly necessary - * for the primary server, however the assumption is that we'll be - * cloning standbys and thus copying the primary configuration; - * this way the standby will be correctly configured by default. - */ - - i = guc_set(conn, "hot_standby", "=", "on"); - if (i == 0 || i == -1) - { - if (i == 0) - { - log_error(_("parameter 'hot_standby' must be set to 'on'")); - } - - if (exit_on_error == true) - { - PQfinish(conn); - exit(ERR_BAD_CONFIG); - } - - config_ok = false; - } - - i = guc_set_typed(conn, "max_wal_senders", ">", "0", "integer"); - if (i == 0 || i == -1) - { - if (i == 0) - { - log_error(_("parameter \"max_wal_senders\" must be set to be at least 1")); - log_hint(_("\"max_wal_senders\" should be set to at least the number of expected standbys")); - } - - if (exit_on_error == true) - { - PQfinish(conn); - exit(ERR_BAD_CONFIG); - } - - config_ok = false; - } - - /* - * If using pg_basebackup, ensure sufficient replication connections can be made. - * There's no guarantee they'll still be available by the time pg_basebackup - * is executed, but there's nothing we can do about that. - */ - if (mode == pg_basebackup) - { - - PGconn **connections; - int i; - int min_replication_connections = 1, - possible_replication_connections = 0; - - t_conninfo_param_list repl_conninfo; - - /* Make a copy of the connection parameter arrays, and append "replication" */ - - initialize_conninfo_params(&repl_conninfo, false); - - conn_to_param_list(conn, &repl_conninfo); - - param_set(&repl_conninfo, "replication", "1"); - - if (*runtime_options.replication_user) - param_set(&repl_conninfo, "user", runtime_options.replication_user); - - /* - * work out how many replication connections are required (1 or 2) - */ - - if (xlog_stream == true) - min_replication_connections += 1; - - log_verbose(LOG_NOTICE, "checking for available walsenders on upstream node (%i required)", - min_replication_connections); - - connections = pg_malloc0(sizeof(PGconn *) * min_replication_connections); - - /* Attempt to create the minimum number of required concurrent connections */ - for (i = 0; i < min_replication_connections; i++) - { - PGconn *replication_conn; - - replication_conn = establish_db_connection_by_params(&repl_conninfo, false); - - if (PQstatus(replication_conn) == CONNECTION_OK) - { - connections[i] = replication_conn; - possible_replication_connections++; - } - } - - /* Close previously created connections */ - for (i = 0; i < possible_replication_connections; i++) - { - PQfinish(connections[i]); - } - - pfree(connections); - free_conninfo_params(&repl_conninfo); - - if (possible_replication_connections < min_replication_connections) - { - config_ok = false; - - /* - * XXX at this point we could check current_setting('max_wal_senders) - COUNT(*) FROM pg_stat_replication; - * if >= min_replication_connections we could infer possible authentication error. - * - * Alternatively call PQconnectStart() and poll for presence/absence of CONNECTION_AUTH_OK ? - */ - log_error(_("unable to establish necessary replication connections")); - log_hint(_("increase \"max_wal_senders\" by at least %i"), - min_replication_connections - possible_replication_connections); - - if (exit_on_error == true) - { - PQfinish(conn); - exit(ERR_BAD_CONFIG); - } - } - - log_verbose(LOG_INFO, "sufficient walsenders available on upstream node (%i required)", - min_replication_connections); - } - - return config_ok; -} standy_clone_mode get_standby_clone_mode(void)