standby switchover: improve directory check failure handling

It's possible that the remote data directory check will fail if e.g.
connection configuration is not consistent across all nodes. This
modification ensures a database error connection is reported, rather
than a spurios issue with the data directory configuration.
This commit is contained in:
Ian Barwick
2020-04-15 10:39:30 +09:00
parent 78f89a4d47
commit 32dde4eaaf
3 changed files with 84 additions and 18 deletions

View File

@@ -766,7 +766,7 @@ do_node_check(void)
{
if (runtime_options.output_mode == OM_OPTFORMAT)
{
exit_optformat_error("CONNINFO_PARSE_ERROR",
exit_optformat_error("CONNINFO_PARSE",
ERR_BAD_CONFIG);
}
@@ -804,7 +804,7 @@ do_node_check(void)
*/
if (PQstatus(conn) != CONNECTION_OK)
{
exit_optformat_error("CONNECTION_ERROR",
exit_optformat_error("DB_CONNECTION",
ERR_DB_CONN);
}

View File

@@ -129,10 +129,12 @@ static bool check_free_slots(t_node_info *local_node_record, SiblingNodeStats *s
static void sibling_nodes_follow(t_node_info *local_node_record, NodeInfoList *sibling_nodes, SiblingNodeStats *sibling_nodes_stats);
static t_remote_error_type parse_remote_error(const char *error);
static NodeStatus parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr *checkPoint);
static CheckStatus parse_node_check_archiver(const char *node_check_output, int *files, int *threshold);
static ConnectionStatus parse_remote_node_replication_connection(const char *node_check_output);
static bool parse_data_directory_config(const char *node_check_output);
static bool parse_data_directory_config(const char *node_check_output, t_remote_error_type *remote_error);
static bool parse_replication_config_owner(const char *node_check_output);
@@ -4049,23 +4051,58 @@ do_standby_switchover(void)
}
/* check remote repmgr has the data directory correctly configured */
if (parse_data_directory_config(command_output.data) == false)
{
log_error(_("\"data_directory\" parameter in \"repmgr.conf\" on \"%s\" (ID: %i) is incorrectly configured"),
remote_node_record.node_name,
remote_node_record.node_id);
t_remote_error_type remote_error = REMOTE_ERROR_NONE;
log_hint(_("execute \"repmgr node check --data-directory-config\" on \"%s\" (ID: %i) to diagnose the issue"),
remote_node_record.node_name,
remote_node_record.node_id);
PQfinish(remote_conn);
PQfinish(local_conn);
if (parse_data_directory_config(command_output.data, &remote_error) == false)
{
if (remote_error != REMOTE_ERROR_NONE)
{
log_error(_("unable to run data directory check on node \"%s\" (ID: %i)"),
remote_node_record.node_name,
remote_node_record.node_id);
termPQExpBuffer(&command_output);
if (remote_error == REMOTE_ERROR_DB_CONNECTION)
{
/* can happen if the connection configuration is not consistent across nodes */
log_detail(_("an error was encountered when attempting to connect to PostgreSQL on node \"%s\" (ID: %i)"),
remote_node_record.node_name,
remote_node_record.node_id);
}
else if (remote_error == REMOTE_ERROR_CONNINFO_PARSE)
{
/* highly unlikely */
log_detail(_("an error was encountered when parsing the \"conninfo\" paremeter in \"rempgr.conf\" on node \"%s\" (ID: %i)"),
remote_node_record.node_name,
remote_node_record.node_id);
}
else
{
log_detail(_("an unknown error was encountered when attempting to connect to PostgreSQL on node \"%s\" (ID: %i)"),
remote_node_record.node_name,
remote_node_record.node_id);
}
}
else
{
log_error(_("\"data_directory\" parameter in \"repmgr.conf\" on \"%s\" (ID: %i) is incorrectly configured"),
remote_node_record.node_name,
remote_node_record.node_id);
exit(ERR_BAD_CONFIG);
log_hint(_("execute \"repmgr node check --data-directory-config\" on \"%s\" (ID: %i) to diagnose the issue"),
remote_node_record.node_name,
remote_node_record.node_id);
}
PQfinish(remote_conn);
PQfinish(local_conn);
termPQExpBuffer(&command_output);
exit(ERR_BAD_CONFIG);
}
}
termPQExpBuffer(&command_output);
@@ -8192,6 +8229,22 @@ sibling_nodes_follow(t_node_info *local_node_record, NodeInfoList *sibling_nodes
static t_remote_error_type
parse_remote_error(const char *error)
{
if (error[0] == '\0')
return REMOTE_ERROR_UNKNOWN;
if (strcasecmp(error, "DB_CONNECTION") == 0)
return REMOTE_ERROR_DB_CONNECTION;
if (strcasecmp(error, "CONNINFO_PARSE") == 0)
return REMOTE_ERROR_CONNINFO_PARSE;
return REMOTE_ERROR_UNKNOWN;
}
static NodeStatus
parse_node_status_is_shutdown_cleanly(const char *node_status_output, XLogRecPtr *checkPoint)
{
@@ -8412,7 +8465,7 @@ parse_node_check_archiver(const char *node_check_output, int *files, int *thresh
}
static bool
parse_data_directory_config(const char *node_check_output)
parse_data_directory_config(const char *node_check_output, t_remote_error_type *remote_error)
{
bool config_ok = true;
@@ -8425,6 +8478,7 @@ parse_data_directory_config(const char *node_check_output)
struct option node_check_options[] =
{
{"configured-data-directory", required_argument, NULL, 'C'},
{"error", required_argument, NULL, 'E'},
{NULL, 0, NULL, 0}
};
@@ -8442,7 +8496,7 @@ parse_data_directory_config(const char *node_check_output)
/* Prevent getopt from emitting errors */
opterr = 0;
while ((c = getopt_long(argc_item, argv_array, "C:", node_check_options,
while ((c = getopt_long(argc_item, argv_array, "C:E:", node_check_options,
&optindex)) != -1)
{
switch (c)
@@ -8455,9 +8509,14 @@ parse_data_directory_config(const char *node_check_output)
config_ok = false;
}
break;
case 'E':
{
*remote_error = parse_remote_error(optarg);
config_ok = false;
}
break;
}
}
free_parsed_argv(&argv_array);
return config_ok;

View File

@@ -221,6 +221,13 @@ typedef enum
JOIN_FAIL_NO_REPLICATION
} standy_join_status;
typedef enum
{
REMOTE_ERROR_UNKNOWN = -1,
REMOTE_ERROR_NONE,
REMOTE_ERROR_DB_CONNECTION,
REMOTE_ERROR_CONNINFO_PARSE
} t_remote_error_type;
typedef struct ColHeader
{