diff --git a/dbutils.h b/dbutils.h index 15b05b3d..2c93e60c 100644 --- a/dbutils.h +++ b/dbutils.h @@ -79,6 +79,14 @@ typedef enum NODE_STATUS_UNCLEAN_SHUTDOWN } NodeStatus; +typedef enum +{ + CONN_UNKNOWN = -1, + CONN_OK, + CONN_BAD, + CONN_ERROR +} ConnectionStatus; + typedef enum { SLOT_UNKNOWN = -1, diff --git a/repmgr-action-node.c b/repmgr-action-node.c index c3002672..329872fa 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -41,6 +41,7 @@ static void _do_node_status_is_shutdown_cleanly(void); static void _do_node_archive_config(void); static void _do_node_restore_config(void); +static void do_node_check_replication_connection(void); static CheckStatus do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list_output); static CheckStatus do_node_check_downstream(PGconn *conn, OutputMode mode, CheckStatusList *list_output); static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output); @@ -463,8 +464,7 @@ _do_node_status_is_shutdown_cleanly(void) initPQExpBuffer(&output); - appendPQExpBuffer( - &output, + appendPQExpBuffer(&output, "--state="); /* sanity-check we're dealing with a PostgreSQL directory */ @@ -580,6 +580,11 @@ do_node_check(void) exit(return_code); } + if (runtime_options.replication_connection == true) + { + do_node_check_replication_connection(); + exit(SUCCESS); + } if (strlen(config_file_options.conninfo)) conn = establish_db_connection(config_file_options.conninfo, true); @@ -883,6 +888,67 @@ do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, Check } +static void +do_node_check_replication_connection(void) +{ + PGconn *local_conn = NULL; + PGconn *repl_conn = NULL; + t_node_info node_record = T_NODE_INFO_INITIALIZER; + RecordStatus record_status = RECORD_NOT_FOUND; + t_conninfo_param_list remote_conninfo; + PQExpBufferData output; + + + initPQExpBuffer(&output); + appendPQExpBuffer(&output, + "--connection="); + + if (runtime_options.remote_node_id == UNKNOWN_NODE_ID) + { + appendPQExpBuffer(&output, "UNKNOWN"); + printf("%s\n", output.data); + termPQExpBuffer(&output); + return; + } + + local_conn = establish_db_connection(config_file_options.conninfo, true); + + record_status = get_node_record(local_conn, runtime_options.remote_node_id, &node_record); + PQfinish(local_conn); + + if (record_status != RECORD_FOUND) + { + appendPQExpBuffer(&output, "UNKNOWN"); + printf("%s\n", output.data); + termPQExpBuffer(&output); + return; + } + + initialize_conninfo_params(&remote_conninfo, false); + parse_conninfo_string(node_record.conninfo, &remote_conninfo, NULL, false); + + param_set(&remote_conninfo, "replication", "1"); + param_set(&remote_conninfo, "user", node_record.repluser); + + repl_conn = establish_db_connection_by_params(&remote_conninfo, false); + + if (PQstatus(repl_conn) != CONNECTION_OK) + { + appendPQExpBuffer(&output, "BAD"); + printf("%s\n", output.data); + termPQExpBuffer(&output); + return; + } + + PQfinish(repl_conn); + + appendPQExpBuffer(&output, "OK"); + printf("%s\n", output.data); + termPQExpBuffer(&output); + + return; +} + static CheckStatus do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list_output) { diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 80f7b30d..7733128b 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -101,6 +101,7 @@ static bool write_recovery_file_line(FILE *recovery_file, char *recovery_file_pa static NodeStatus parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr *checkPoint); static CheckStatus parse_node_check_archiver(const char *node_check_output, int *files, int *threshold); +static ConnectionStatus parse_remote_node_replication_connection(const char *node_check_output); /* * STANDBY CLONE @@ -2405,8 +2406,7 @@ do_standby_switchover(void) appendPQExpBuffer(&remote_command_str, "--version 2>/dev/null && echo \"1\" || echo \"0\""); initPQExpBuffer(&command_output); - command_success = remote_command( - remote_host, + command_success = remote_command(remote_host, runtime_options.remote_user, remote_command_str.data, &command_output); @@ -2426,20 +2426,17 @@ do_standby_switchover(void) termPQExpBuffer(&command_output); initPQExpBuffer(&hint); - appendPQExpBuffer( - &hint, + appendPQExpBuffer(&hint, _("check \"pg_bindir\" is set to the correct path in \"repmgr.conf\"; current value: ")); if (strlen(config_file_options.pg_bindir)) { - appendPQExpBuffer( - &hint, + appendPQExpBuffer(&hint, "\"%s\"", config_file_options.pg_bindir); } else { - appendPQExpBuffer( - &hint, + appendPQExpBuffer(&hint, "(not set)"); } @@ -2460,6 +2457,49 @@ do_standby_switchover(void) } termPQExpBuffer(&command_output); + /* check demotion candidate can make replication connection to promotion candidate */ + { + initPQExpBuffer(&remote_command_str); + make_remote_repmgr_path(&remote_command_str, &remote_node_record); + appendPQExpBuffer(&remote_command_str, + "node check --remote-node-id=%i --replication-connection", + local_node_record.node_id); + + initPQExpBuffer(&command_output); + + command_success = remote_command(remote_host, + runtime_options.remote_user, + remote_command_str.data, + &command_output); + + termPQExpBuffer(&remote_command_str); + + if (command_success == true) + { + ConnectionStatus conn_status = parse_remote_node_replication_connection(command_output.data); + + switch(conn_status) + { + case CONN_OK: + if (runtime_options.dry_run == true) + { + log_info(_("demotion candidate is able to make replication connection to promotion candidate")); + } + break; + case CONN_BAD: + log_error(_("demotion candidate is unable to make replication connection to promotion candidate")); + exit(ERR_BAD_CONFIG); + break; + default: + log_error(_("unable to deterimine whether candidate is able to make replication connection to promotion candidate")); + exit(ERR_BAD_CONFIG); + break; + } + + termPQExpBuffer(&command_output); + } + } + /* check archive/replication status */ { int lag_seconds = 0; @@ -5479,6 +5519,69 @@ parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr } +static ConnectionStatus +parse_remote_node_replication_connection(const char *node_check_output) +{ + ConnectionStatus conn_status = CONN_UNKNOWN; + + int c = 0, + argc_item = 0; + char **argv_array = NULL; + int optindex = 0; + + /* We're only interested in these options */ + struct option node_check_options[] = + { + {"connection", required_argument, NULL, 'c'}, + {NULL, 0, NULL, 0} + }; + + /* Don't attempt to tokenise an empty string */ + if (!strlen(node_check_output)) + { + return CONN_UNKNOWN; + } + + argc_item = parse_output_to_argv(node_check_output, &argv_array); + + /* Reset getopt's optind variable */ + optind = 0; + + /* Prevent getopt from emitting errors */ + opterr = 0; + + while ((c = getopt_long(argc_item, argv_array, "L:S:", node_check_options, + &optindex)) != -1) + { + switch (c) + { + + /* --connection */ + case 'c': + { + if (strncmp(optarg, "OK", MAXLEN) == 0) + { + conn_status = CONN_OK; + } + else if (strncmp(optarg, "BAD", MAXLEN) == 0) + { + conn_status = CONN_BAD; + } + else if (strncmp(optarg, "UNKNOWN", MAXLEN) == 0) + { + conn_status = CONN_UNKNOWN; + } + } + break; + } + } + + free_parsed_argv(&argv_array); + + return conn_status; +} + + static CheckStatus parse_node_check_archiver(const char *node_check_output, int *files, int *threshold) { diff --git a/repmgr-client-global.h b/repmgr-client-global.h index c254a6ec..c5810f64 100644 --- a/repmgr-client-global.h +++ b/repmgr-client-global.h @@ -68,6 +68,7 @@ typedef struct int node_id; char node_name[MAXLEN]; char data_dir[MAXPGPATH]; + int remote_node_id; /* "standby clone" options */ bool copy_external_config_files; @@ -103,6 +104,7 @@ typedef struct bool role; bool slots; bool has_passfile; + bool replication_connection; /* "node join" options */ char config_files[MAXLEN]; @@ -139,8 +141,8 @@ typedef struct "", "", "", "", \ /* other connection options */ \ "", "", \ - /* node options */ \ - UNKNOWN_NODE_ID, "", "", \ + /* general node options */ \ + UNKNOWN_NODE_ID, "", "", UNKNOWN_NODE_ID, \ /* "standby clone" options */ \ false, CONFIG_FILE_SAMEPATH, false, false, false, "", "", "", \ false, \ @@ -153,7 +155,7 @@ typedef struct /* "node status" options */ \ false, \ /* "node check" options */ \ - false, false, false, false, false, false, \ + false, false, false, false, false, false, false, \ /* "node join" options */ \ "", \ /* "node service" options */ \ diff --git a/repmgr-client.c b/repmgr-client.c index 50a28b08..e6b5304c 100644 --- a/repmgr-client.c +++ b/repmgr-client.c @@ -60,7 +60,6 @@ #include "repmgr-action-witness.h" #include "repmgr-action-bdr.h" #include "repmgr-action-node.h" - #include "repmgr-action-cluster.h" #include /* for PG_TEMP_FILE_PREFIX */ @@ -329,6 +328,11 @@ main(int argc, char **argv) strncpy(runtime_options.node_name, optarg, MAXLEN); break; + /* --remote-node-id */ + case OPT_REMOTE_NODE_ID: + runtime_options.remote_node_id = repmgr_atoi(optarg, "--remote-node-id", &cli_errors, false); + break; + /* * standby options * --------------- */ @@ -455,6 +459,10 @@ main(int argc, char **argv) runtime_options.has_passfile = true; break; + case OPT_REPL_CONN: + runtime_options.replication_connection = true; + break; + /*-------------------- * "node rejoin" options *-------------------- diff --git a/repmgr-client.h b/repmgr-client.h index cc901ee3..bad1ec5a 100644 --- a/repmgr-client.h +++ b/repmgr-client.h @@ -83,6 +83,8 @@ #define OPT_CONFIG_ARCHIVE_DIR 1034 #define OPT_HAS_PASSFILE 1035 #define OPT_WAIT_START 1036 +#define OPT_REPL_CONN 1037 +#define OPT_REMOTE_NODE_ID 1038 /* deprecated since 3.3 */ #define OPT_DATA_DIR 999 @@ -115,6 +117,7 @@ static struct option long_options[] = {"pgdata", required_argument, NULL, 'D'}, {"node-id", required_argument, NULL, OPT_NODE_ID}, {"node-name", required_argument, NULL, OPT_NODE_NAME}, + {"remote-node-id", required_argument, NULL, OPT_REMOTE_NODE_ID}, /* logging options */ {"log-level", required_argument, NULL, 'L'}, @@ -158,6 +161,7 @@ static struct option long_options[] = {"role", no_argument, NULL, OPT_ROLE}, {"slots", no_argument, NULL, OPT_SLOTS}, {"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE}, + {"replication-connection", no_argument, NULL, OPT_REPL_CONN}, /* "node rejoin" options */ {"config-files", required_argument, NULL, OPT_CONFIG_FILES},