mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-23 07:06:30 +00:00
repmgr: catch possible corner case when checking node shutdown status
It's conceivable that PQping is returning "no response" but the shutdown hasn't quite completed.
This commit is contained in:
@@ -4692,6 +4692,8 @@ print_node_status(NodeStatus node_status)
|
||||
return "UNKNOWN";
|
||||
case NODE_STATUS_UP:
|
||||
return "UP";
|
||||
case NODE_STATUS_SHUTTING_DOWN:
|
||||
return "SHUTTING_DOWN";
|
||||
case NODE_STATUS_DOWN:
|
||||
return "DOWN";
|
||||
case NODE_STATUS_UNCLEAN_SHUTDOWN:
|
||||
|
||||
@@ -74,6 +74,7 @@ typedef enum
|
||||
{
|
||||
NODE_STATUS_UNKNOWN = -1,
|
||||
NODE_STATUS_UP,
|
||||
NODE_STATUS_SHUTTING_DOWN,
|
||||
NODE_STATUS_DOWN,
|
||||
NODE_STATUS_UNCLEAN_SHUTDOWN
|
||||
} NodeStatus;
|
||||
|
||||
@@ -496,15 +496,26 @@ _do_node_status_is_shutdown_cleanly(void)
|
||||
|
||||
db_state = get_db_state(config_file_options.data_directory);
|
||||
|
||||
|
||||
log_verbose(LOG_DEBUG, "db state now: %s", describe_db_state(db_state));
|
||||
|
||||
if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY)
|
||||
{
|
||||
/*
|
||||
* node is not running, but pg_controldata says it is - unclean
|
||||
* shutdown
|
||||
*/
|
||||
|
||||
if (node_status != NODE_STATUS_UP)
|
||||
{
|
||||
node_status = NODE_STATUS_UNCLEAN_SHUTDOWN;
|
||||
if (db_state == DB_SHUTDOWNING)
|
||||
{
|
||||
node_status = NODE_STATUS_SHUTTING_DOWN;
|
||||
}
|
||||
/*
|
||||
* node is not running or shutting down, but pg_controldata says it is -
|
||||
* unclean shutdown
|
||||
*/
|
||||
else
|
||||
{
|
||||
node_status = NODE_STATUS_UNCLEAN_SHUTDOWN;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -525,19 +536,24 @@ _do_node_status_is_shutdown_cleanly(void)
|
||||
node_status = NODE_STATUS_DOWN;
|
||||
}
|
||||
|
||||
log_verbose(LOG_DEBUG, "node status determined as: %s", print_node_status(node_status));
|
||||
|
||||
switch (node_status)
|
||||
{
|
||||
case NODE_STATUS_UP:
|
||||
appendPQExpBuffer(&output, "RUNNING");
|
||||
break;
|
||||
case NODE_STATUS_UNCLEAN_SHUTDOWN:
|
||||
appendPQExpBuffer(&output, "UNCLEAN_SHUTDOWN");
|
||||
case NODE_STATUS_SHUTTING_DOWN:
|
||||
appendPQExpBuffer(&output, "SHUTTING_DOWN");
|
||||
break;
|
||||
case NODE_STATUS_DOWN:
|
||||
appendPQExpBuffer(&output,
|
||||
"SHUTDOWN --last-checkpoint-lsn=%X/%X",
|
||||
format_lsn(checkPoint));
|
||||
break;
|
||||
case NODE_STATUS_UNCLEAN_SHUTDOWN:
|
||||
appendPQExpBuffer(&output, "UNCLEAN_SHUTDOWN");
|
||||
break;
|
||||
case NODE_STATUS_UNKNOWN:
|
||||
appendPQExpBuffer(&output, "UNKNOWN");
|
||||
break;
|
||||
|
||||
@@ -2701,6 +2701,11 @@ do_standby_switchover(void)
|
||||
}
|
||||
log_error(_("current primary did not shut down cleanly, continuing anyway"));
|
||||
shutdown_success = true;
|
||||
break;
|
||||
}
|
||||
else if (status == NODE_STATUS_SHUTTING_DOWN)
|
||||
{
|
||||
log_info(_("remote node is still shutting down"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5268,7 +5273,7 @@ parse_output_to_argv(const char *string, char ***argv_array)
|
||||
{
|
||||
int argv_len = strlen(cell->string) + 1;
|
||||
|
||||
local_argv_array[c] = pg_malloc0(argv_len);
|
||||
local_argv_array[c] = (char *)pg_malloc0(argv_len);
|
||||
|
||||
strncpy(local_argv_array[c], cell->string, argv_len);
|
||||
|
||||
@@ -5293,11 +5298,11 @@ free_parsed_argv(char ***argv_array)
|
||||
|
||||
while (local_argv_array[i] != NULL)
|
||||
{
|
||||
pfree(local_argv_array[i]);
|
||||
pfree((char *)local_argv_array[i]);
|
||||
i++;
|
||||
}
|
||||
|
||||
pfree(local_argv_array);
|
||||
pfree((char **)local_argv_array);
|
||||
*argv_array = NULL;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user