From faffb2a6e7195b811c8e6f13f4d5fcf1d134f36d Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 10 Jan 2018 14:56:00 +0900 Subject: [PATCH] repmgr: catch possible corner case when checking node shutdown status It's conceivable that PQping is returning "no response" but the shutdown hasn't quite completed. --- dbutils.c | 2 ++ dbutils.h | 1 + repmgr-action-node.c | 30 +++++++++++++++++++++++------- repmgr-action-standby.c | 11 ++++++++--- 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/dbutils.c b/dbutils.c index 6b18d1e5..df7ad746 100644 --- a/dbutils.c +++ b/dbutils.c @@ -4692,6 +4692,8 @@ print_node_status(NodeStatus node_status) return "UNKNOWN"; case NODE_STATUS_UP: return "UP"; + case NODE_STATUS_SHUTTING_DOWN: + return "SHUTTING_DOWN"; case NODE_STATUS_DOWN: return "DOWN"; case NODE_STATUS_UNCLEAN_SHUTDOWN: diff --git a/dbutils.h b/dbutils.h index 818917f7..2dd2ba84 100644 --- a/dbutils.h +++ b/dbutils.h @@ -74,6 +74,7 @@ typedef enum { NODE_STATUS_UNKNOWN = -1, NODE_STATUS_UP, + NODE_STATUS_SHUTTING_DOWN, NODE_STATUS_DOWN, NODE_STATUS_UNCLEAN_SHUTDOWN } NodeStatus; diff --git a/repmgr-action-node.c b/repmgr-action-node.c index bd8b252b..ce50d11f 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -496,15 +496,26 @@ _do_node_status_is_shutdown_cleanly(void) db_state = get_db_state(config_file_options.data_directory); + + log_verbose(LOG_DEBUG, "db state now: %s", describe_db_state(db_state)); + if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY) { - /* - * node is not running, but pg_controldata says it is - unclean - * shutdown - */ + if (node_status != NODE_STATUS_UP) { - node_status = NODE_STATUS_UNCLEAN_SHUTDOWN; + if (db_state == DB_SHUTDOWNING) + { + node_status = NODE_STATUS_SHUTTING_DOWN; + } + /* + * node is not running or shutting down, but pg_controldata says it is - + * unclean shutdown + */ + else + { + node_status = NODE_STATUS_UNCLEAN_SHUTDOWN; + } } } @@ -525,19 +536,24 @@ _do_node_status_is_shutdown_cleanly(void) node_status = NODE_STATUS_DOWN; } + log_verbose(LOG_DEBUG, "node status determined as: %s", print_node_status(node_status)); + switch (node_status) { case NODE_STATUS_UP: appendPQExpBuffer(&output, "RUNNING"); break; - case NODE_STATUS_UNCLEAN_SHUTDOWN: - appendPQExpBuffer(&output, "UNCLEAN_SHUTDOWN"); + case NODE_STATUS_SHUTTING_DOWN: + appendPQExpBuffer(&output, "SHUTTING_DOWN"); break; case NODE_STATUS_DOWN: appendPQExpBuffer(&output, "SHUTDOWN --last-checkpoint-lsn=%X/%X", format_lsn(checkPoint)); break; + case NODE_STATUS_UNCLEAN_SHUTDOWN: + appendPQExpBuffer(&output, "UNCLEAN_SHUTDOWN"); + break; case NODE_STATUS_UNKNOWN: appendPQExpBuffer(&output, "UNKNOWN"); break; diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 32bfceec..ad12b536 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -2701,6 +2701,11 @@ do_standby_switchover(void) } log_error(_("current primary did not shut down cleanly, continuing anyway")); shutdown_success = true; + break; + } + else if (status == NODE_STATUS_SHUTTING_DOWN) + { + log_info(_("remote node is still shutting down")); } } @@ -5268,7 +5273,7 @@ parse_output_to_argv(const char *string, char ***argv_array) { int argv_len = strlen(cell->string) + 1; - local_argv_array[c] = pg_malloc0(argv_len); + local_argv_array[c] = (char *)pg_malloc0(argv_len); strncpy(local_argv_array[c], cell->string, argv_len); @@ -5293,11 +5298,11 @@ free_parsed_argv(char ***argv_array) while (local_argv_array[i] != NULL) { - pfree(local_argv_array[i]); + pfree((char *)local_argv_array[i]); i++; } - pfree(local_argv_array); + pfree((char **)local_argv_array); *argv_array = NULL; }