From 5d5704411800ebe9eae8fe7132343dca9b58ea49 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 10 Jan 2018 12:21:04 +0900 Subject: [PATCH] repmgr: during switchover, correctly detect unclean shutdown status --- dbutils.c | 25 ++++++++++++++++++++++++- dbutils.h | 3 +++ repmgr-action-standby.c | 32 +++++++++++++++++++++++--------- 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/dbutils.c b/dbutils.c index ef35bc3e..6b18d1e5 100644 --- a/dbutils.c +++ b/dbutils.c @@ -4657,12 +4657,13 @@ bdr_node_set_repmgr_set(PGconn *conn, const char *node_name) " SELECT bdr.connection_set_replication_sets( " " ARRAY( " " SELECT repset::TEXT " - " FROM UNNEST(bdr.connection_get_replication_sets('node1')) AS repset " + " FROM UNNEST(bdr.connection_get_replication_sets('%s')) AS repset " " UNION " " SELECT 'repmgr'::TEXT " " ), " " '%s' " " ) ", + node_name, node_name); res = PQexec(conn, query.data); @@ -4677,3 +4678,25 @@ bdr_node_set_repmgr_set(PGconn *conn, const char *node_name) return success; } + + + +/* miscellaneous debugging functions */ + +const char * +print_node_status(NodeStatus node_status) +{ + switch (node_status) + { + case NODE_STATUS_UNKNOWN: + return "UNKNOWN"; + case NODE_STATUS_UP: + return "UP"; + case NODE_STATUS_DOWN: + return "DOWN"; + case NODE_STATUS_UNCLEAN_SHUTDOWN: + return "UNCLEAN_SHUTDOWN"; + } + + return "UNIDENTIFIED_STATUS"; +} diff --git a/dbutils.h b/dbutils.h index 45e2bb3c..818917f7 100644 --- a/dbutils.h +++ b/dbutils.h @@ -509,4 +509,7 @@ void unset_bdr_failover_handler(PGconn *conn); bool bdr_node_has_repmgr_set(PGconn *conn, const char *node_name); bool bdr_node_set_repmgr_set(PGconn *conn, const char *node_name); +/* miscellaneous debugging functions */ +const char *print_node_status(NodeStatus node_status); + #endif /* _REPMGR_DBUTILS_H_ */ diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index a1710e2f..32bfceec 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -2648,7 +2648,6 @@ do_standby_switchover(void) i + 1, config_file_options.reconnect_attempts); ping_res = PQping(remote_conninfo); - /* database server could not be contacted */ if (ping_res == PQPING_NO_RESPONSE || ping_res == PQPING_NO_ATTEMPT) { @@ -2668,8 +2667,7 @@ do_standby_switchover(void) initPQExpBuffer(&command_output); - command_success = remote_command( - remote_host, + command_success = remote_command(remote_host, runtime_options.remote_user, remote_command_str.data, &command_output); @@ -2680,6 +2678,8 @@ do_standby_switchover(void) { NodeStatus status = parse_node_status_is_shutdown_cleanly(command_output.data, &remote_last_checkpoint_lsn); + log_verbose(LOG_DEBUG, "remote node status is: %s", print_node_status(status)); + if (status == NODE_STATUS_DOWN && remote_last_checkpoint_lsn != InvalidXLogRecPtr) { shutdown_success = true; @@ -2689,6 +2689,19 @@ do_standby_switchover(void) break; } + /* remote node did not shut down cleanly */ + else if (status == NODE_STATUS_UNCLEAN_SHUTDOWN) + { + if (!runtime_options.force) + { + log_error(_("current primary did not shut down cleanly, aborting")); + log_hint(_("use -F/--force to promote current standby")); + termPQExpBuffer(&command_output); + exit(ERR_SWITCHOVER_FAIL); + } + log_error(_("current primary did not shut down cleanly, continuing anyway")); + shutdown_success = true; + } } termPQExpBuffer(&command_output); @@ -2707,7 +2720,7 @@ do_standby_switchover(void) /* this is unlikely to happen, but check and handle gracefully anyway */ if (PQstatus(local_conn) != CONNECTION_OK) { - log_warning(_("connection to local node lost, reconnecting..")); + log_warning(_("connection to local node lost, reconnecting...")); local_conn = establish_db_connection(config_file_options.conninfo, false); if (PQstatus(local_conn) != CONNECTION_OK) @@ -5054,7 +5067,7 @@ parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr int optindex = 0; /* We're only interested in these options */ - static struct option long_options[] = + struct option node_status_options[] = { {"last-checkpoint-lsn", required_argument, NULL, 'L'}, {"state", required_argument, NULL, 'S'}, @@ -5076,7 +5089,7 @@ parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr /* Prevent getopt from emitting errors */ opterr = 0; - while ((c = getopt_long(argc_item, argv_array, "L:S:", long_options, + while ((c = getopt_long(argc_item, argv_array, "L:S:", node_status_options, &optindex)) != -1) { switch (c) @@ -5126,7 +5139,7 @@ parse_node_check_archiver(const char *node_check_output, int *files, int *thresh int optindex = 0; /* We're only interested in these options */ - static struct option long_options[] = + struct option node_check_options[] = { {"status", required_argument, NULL, 'S'}, {"files", required_argument, NULL, 'f'}, @@ -5152,7 +5165,7 @@ parse_node_check_archiver(const char *node_check_output, int *files, int *thresh /* Prevent getopt from emitting errors */ opterr = 0; - while ((c = getopt_long(argc_item, argv_array, "f:S:t:", long_options, + while ((c = getopt_long(argc_item, argv_array, "f:S:t:", node_check_options, &optindex)) != -1) { switch (c) @@ -5228,7 +5241,7 @@ parse_output_to_argv(const char *string, char ***argv_array) /* Extract arguments into a list and keep a count of the total */ while ((argv_item = strtok(options_string_ptr, " ")) != NULL) { - item_list_append(&option_argv, argv_item); + item_list_append(&option_argv, trim(argv_item)); argc_item++; @@ -5285,6 +5298,7 @@ free_parsed_argv(char ***argv_array) } pfree(local_argv_array); + *argv_array = NULL; }