"standby switchover": improve logging

Also no need to disconnect/reconnect from/to local node while it promotes.
This commit is contained in:
Ian Barwick
2017-09-05 10:26:27 +09:00
parent d82e936556
commit 5b5b456ecb
2 changed files with 25 additions and 16 deletions

View File

@@ -1424,12 +1424,18 @@ do_node_service(void)
return; return;
} }
log_notice(_("executing server command \"%s\""), command); /*
* log level is "DETAIL" here as this command is intended to be executed
* by another repmgr process (e.g. during standby switchover); that repmgr
* should emit a "NOTICE" about the intent of the command.
*/
log_detail(_("executing server command \"%s\""), command);
initPQExpBuffer(&output); initPQExpBuffer(&output);
if (local_command(command, &output) == false) if (local_command(command, &output) == false)
{ {
termPQExpBuffer(&output);
exit(ERR_LOCAL_COMMAND); exit(ERR_LOCAL_COMMAND);
} }

View File

@@ -1403,7 +1403,6 @@ _do_standby_promote_internal(PGconn *conn, const char *data_dir)
details.data); details.data);
termPQExpBuffer(&details); termPQExpBuffer(&details);
PQfinish(conn);
return; return;
} }
@@ -1607,7 +1606,6 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor
{ {
t_node_info local_node_record = T_NODE_INFO_INITIALIZER; t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
int original_upstream_node_id = UNKNOWN_NODE_ID; int original_upstream_node_id = UNKNOWN_NODE_ID;
int r;
RecordStatus record_status = RECORD_NOT_FOUND; RecordStatus record_status = RECORD_NOT_FOUND;
char *errmsg = NULL; char *errmsg = NULL;
@@ -1718,6 +1716,7 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor
char server_command[MAXLEN] = ""; char server_command[MAXLEN] = "";
bool server_up = is_server_available(config_file_options.conninfo); bool server_up = is_server_available(config_file_options.conninfo);
char *action = NULL; char *action = NULL;
int r;
if (server_up == true) if (server_up == true)
{ {
@@ -2384,7 +2383,7 @@ do_standby_switchover(void)
} }
} }
} }
PQfinish(local_conn); //PQfinish(local_conn);
/* /*
@@ -2411,7 +2410,6 @@ do_standby_switchover(void)
make_remote_repmgr_path(&remote_command_str, &remote_node_record); make_remote_repmgr_path(&remote_command_str, &remote_node_record);
if (runtime_options.dry_run == true) if (runtime_options.dry_run == true)
{ {
appendPQExpBuffer(&remote_command_str, appendPQExpBuffer(&remote_command_str,
@@ -2420,6 +2418,9 @@ do_standby_switchover(void)
} }
else else
{ {
log_notice(_("stopping current primary node \"%s\" (ID: %i)"),
remote_node_record.node_name,
remote_node_record.node_id);
appendPQExpBuffer(&remote_command_str, appendPQExpBuffer(&remote_command_str,
"node service --action=stop --checkpoint"); "node service --action=stop --checkpoint");
} }
@@ -2432,10 +2433,8 @@ do_standby_switchover(void)
remote_command_str.data, remote_command_str.data,
&command_output); &command_output);
termPQExpBuffer(&remote_command_str); termPQExpBuffer(&remote_command_str);
/* /*
* --dry-run ends here with display of command which would be used to * --dry-run ends here with display of command which would be used to
* shut down the remote server * shut down the remote server
@@ -2531,14 +2530,19 @@ do_standby_switchover(void)
exit(ERR_SWITCHOVER_FAIL); exit(ERR_SWITCHOVER_FAIL);
} }
/* this is unlikely to happen, but check and handle gracefully anyway */
local_conn = establish_db_connection(config_file_options.conninfo, false);
if (PQstatus(local_conn) != CONNECTION_OK) if (PQstatus(local_conn) != CONNECTION_OK)
{ {
log_error(_("unable to reestablish connection to local node \"%s\""), log_warning(_("connection to local node lost, reconnecting.."));
local_node_record.node_name); local_conn = establish_db_connection(config_file_options.conninfo, false);
exit(ERR_DB_CONN);
if (PQstatus(local_conn) != CONNECTION_OK)
{
log_error(_("unable to reconnect to local node \"%s\""),
local_node_record.node_name);
exit(ERR_DB_CONN);
}
log_verbose(LOG_INFO, _("successfully reconnected to local node"));
} }
get_replication_info(local_conn, &replication_info); get_replication_info(local_conn, &replication_info);
@@ -2614,7 +2618,7 @@ do_standby_switchover(void)
log_debug("executing:\n \"%s\"", remote_command_str.data); log_debug("executing:\n \"%s\"", remote_command_str.data);
initPQExpBuffer(&command_output); initPQExpBuffer(&command_output);
r = remote_command( command_success = remote_command(
remote_host, remote_host,
runtime_options.remote_user, runtime_options.remote_user,
remote_command_str.data, remote_command_str.data,
@@ -2625,7 +2629,7 @@ do_standby_switchover(void)
/* TODO: verify this node's record was updated correctly */ /* TODO: verify this node's record was updated correctly */
if (r == false) if (command_success == false)
{ {
log_error(_("rejoin failed %i"), r); log_error(_("rejoin failed %i"), r);
log_detail("%s", command_output.data); log_detail("%s", command_output.data);
@@ -2749,7 +2753,6 @@ do_standby_switchover(void)
log_notice(_("STANDBY SWITCHOVER is complete")); log_notice(_("STANDBY SWITCHOVER is complete"));
return; return;
} }