mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-26 16:46:28 +00:00
repmgr: catch possible corner case when checking node shutdown status
It's conceivable that PQping is returning "no response" but the shutdown hasn't quite completed.
This commit is contained in:
@@ -4692,6 +4692,8 @@ print_node_status(NodeStatus node_status)
|
|||||||
return "UNKNOWN";
|
return "UNKNOWN";
|
||||||
case NODE_STATUS_UP:
|
case NODE_STATUS_UP:
|
||||||
return "UP";
|
return "UP";
|
||||||
|
case NODE_STATUS_SHUTTING_DOWN:
|
||||||
|
return "SHUTTING_DOWN";
|
||||||
case NODE_STATUS_DOWN:
|
case NODE_STATUS_DOWN:
|
||||||
return "DOWN";
|
return "DOWN";
|
||||||
case NODE_STATUS_UNCLEAN_SHUTDOWN:
|
case NODE_STATUS_UNCLEAN_SHUTDOWN:
|
||||||
|
|||||||
@@ -74,6 +74,7 @@ typedef enum
|
|||||||
{
|
{
|
||||||
NODE_STATUS_UNKNOWN = -1,
|
NODE_STATUS_UNKNOWN = -1,
|
||||||
NODE_STATUS_UP,
|
NODE_STATUS_UP,
|
||||||
|
NODE_STATUS_SHUTTING_DOWN,
|
||||||
NODE_STATUS_DOWN,
|
NODE_STATUS_DOWN,
|
||||||
NODE_STATUS_UNCLEAN_SHUTDOWN
|
NODE_STATUS_UNCLEAN_SHUTDOWN
|
||||||
} NodeStatus;
|
} NodeStatus;
|
||||||
|
|||||||
@@ -496,15 +496,26 @@ _do_node_status_is_shutdown_cleanly(void)
|
|||||||
|
|
||||||
db_state = get_db_state(config_file_options.data_directory);
|
db_state = get_db_state(config_file_options.data_directory);
|
||||||
|
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "db state now: %s", describe_db_state(db_state));
|
||||||
|
|
||||||
if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY)
|
if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY)
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
* node is not running, but pg_controldata says it is - unclean
|
|
||||||
* shutdown
|
|
||||||
*/
|
|
||||||
if (node_status != NODE_STATUS_UP)
|
if (node_status != NODE_STATUS_UP)
|
||||||
{
|
{
|
||||||
node_status = NODE_STATUS_UNCLEAN_SHUTDOWN;
|
if (db_state == DB_SHUTDOWNING)
|
||||||
|
{
|
||||||
|
node_status = NODE_STATUS_SHUTTING_DOWN;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* node is not running or shutting down, but pg_controldata says it is -
|
||||||
|
* unclean shutdown
|
||||||
|
*/
|
||||||
|
else
|
||||||
|
{
|
||||||
|
node_status = NODE_STATUS_UNCLEAN_SHUTDOWN;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -525,19 +536,24 @@ _do_node_status_is_shutdown_cleanly(void)
|
|||||||
node_status = NODE_STATUS_DOWN;
|
node_status = NODE_STATUS_DOWN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log_verbose(LOG_DEBUG, "node status determined as: %s", print_node_status(node_status));
|
||||||
|
|
||||||
switch (node_status)
|
switch (node_status)
|
||||||
{
|
{
|
||||||
case NODE_STATUS_UP:
|
case NODE_STATUS_UP:
|
||||||
appendPQExpBuffer(&output, "RUNNING");
|
appendPQExpBuffer(&output, "RUNNING");
|
||||||
break;
|
break;
|
||||||
case NODE_STATUS_UNCLEAN_SHUTDOWN:
|
case NODE_STATUS_SHUTTING_DOWN:
|
||||||
appendPQExpBuffer(&output, "UNCLEAN_SHUTDOWN");
|
appendPQExpBuffer(&output, "SHUTTING_DOWN");
|
||||||
break;
|
break;
|
||||||
case NODE_STATUS_DOWN:
|
case NODE_STATUS_DOWN:
|
||||||
appendPQExpBuffer(&output,
|
appendPQExpBuffer(&output,
|
||||||
"SHUTDOWN --last-checkpoint-lsn=%X/%X",
|
"SHUTDOWN --last-checkpoint-lsn=%X/%X",
|
||||||
format_lsn(checkPoint));
|
format_lsn(checkPoint));
|
||||||
break;
|
break;
|
||||||
|
case NODE_STATUS_UNCLEAN_SHUTDOWN:
|
||||||
|
appendPQExpBuffer(&output, "UNCLEAN_SHUTDOWN");
|
||||||
|
break;
|
||||||
case NODE_STATUS_UNKNOWN:
|
case NODE_STATUS_UNKNOWN:
|
||||||
appendPQExpBuffer(&output, "UNKNOWN");
|
appendPQExpBuffer(&output, "UNKNOWN");
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -2701,6 +2701,11 @@ do_standby_switchover(void)
|
|||||||
}
|
}
|
||||||
log_error(_("current primary did not shut down cleanly, continuing anyway"));
|
log_error(_("current primary did not shut down cleanly, continuing anyway"));
|
||||||
shutdown_success = true;
|
shutdown_success = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (status == NODE_STATUS_SHUTTING_DOWN)
|
||||||
|
{
|
||||||
|
log_info(_("remote node is still shutting down"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -5268,7 +5273,7 @@ parse_output_to_argv(const char *string, char ***argv_array)
|
|||||||
{
|
{
|
||||||
int argv_len = strlen(cell->string) + 1;
|
int argv_len = strlen(cell->string) + 1;
|
||||||
|
|
||||||
local_argv_array[c] = pg_malloc0(argv_len);
|
local_argv_array[c] = (char *)pg_malloc0(argv_len);
|
||||||
|
|
||||||
strncpy(local_argv_array[c], cell->string, argv_len);
|
strncpy(local_argv_array[c], cell->string, argv_len);
|
||||||
|
|
||||||
@@ -5293,11 +5298,11 @@ free_parsed_argv(char ***argv_array)
|
|||||||
|
|
||||||
while (local_argv_array[i] != NULL)
|
while (local_argv_array[i] != NULL)
|
||||||
{
|
{
|
||||||
pfree(local_argv_array[i]);
|
pfree((char *)local_argv_array[i]);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
pfree(local_argv_array);
|
pfree((char **)local_argv_array);
|
||||||
*argv_array = NULL;
|
*argv_array = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user