diff --git a/HISTORY b/HISTORY index 0be7c2aa..db7118c0 100644 --- a/HISTORY +++ b/HISTORY @@ -1,3 +1,7 @@ +4.0.3 2018-02- + repmgr: improve switchover handling when "pg_ctl" used to control the + server and logging output is not explicitly redirected (Ian) + 4.0.2 2018-01- repmgr: add missing -W option to getopt_long() invocation; GitHub #350 (Ian) repmgr: automatically create slot name if missing; GitHub #343 (Ian) diff --git a/errcode.h b/errcode.h index 98dc715e..4d137603 100644 --- a/errcode.h +++ b/errcode.h @@ -43,5 +43,6 @@ #define ERR_BARMAN 19 #define ERR_REGISTRATION_SYNC 20 #define ERR_OUT_OF_MEMORY 21 +#define ERR_REJOIN_FAIL 22 #endif /* _ERRCODE_H_ */ diff --git a/repmgr-action-node.c b/repmgr-action-node.c index 17236db9..882785b4 100644 --- a/repmgr-action-node.c +++ b/repmgr-action-node.c @@ -1590,6 +1590,7 @@ do_node_rejoin(void) bool success = true; int server_version_num = UNKNOWN_SERVER_VERSION_NUM; + int follow_error_code = SUCCESS; /* check node is not actually running */ @@ -1859,7 +1860,31 @@ do_node_rejoin(void) success = do_standby_follow_internal(upstream_conn, &primary_node_record, - &follow_output); + &follow_output, + &follow_error_code); + + if (success == false) + { + log_notice(_("NODE REJOIN failed")); + log_detail("%s", follow_output.data); + + create_event_notification(upstream_conn, + &config_file_options, + config_file_options.node_id, + "node_rejoin", + success, + follow_output.data); + + PQfinish(upstream_conn); + + termPQExpBuffer(&follow_output); + exit(follow_error_code); + } + + /* + * XXX add checks that node actually started and connected to primary, + * if not exit with ERR_REJOIN_FAIL + */ create_event_notification(upstream_conn, &config_file_options, @@ -1870,19 +1895,12 @@ do_node_rejoin(void) PQfinish(upstream_conn); - if (success == false) - { - log_notice(_("NODE REJOIN failed")); - log_detail("%s", follow_output.data); - - termPQExpBuffer(&follow_output); - exit(ERR_DB_QUERY); - } - log_notice(_("NODE REJOIN successful")); log_detail("%s", follow_output.data); termPQExpBuffer(&follow_output); + + return; } diff --git a/repmgr-action-standby.c b/repmgr-action-standby.c index 11ac68ec..465e2510 100644 --- a/repmgr-action-standby.c +++ b/repmgr-action-standby.c @@ -1590,6 +1590,7 @@ do_standby_follow(void) PQExpBufferData follow_output; bool success = false; + int follow_error_code = SUCCESS; uint64 local_system_identifier = UNKNOWN_SYSTEM_IDENTIFIER; t_conninfo_param_list repl_conninfo; @@ -1703,7 +1704,8 @@ do_standby_follow(void) success = do_standby_follow_internal(primary_conn, &primary_node_record, - &follow_output); + &follow_output, + &follow_error_code); create_event_notification(primary_conn, &config_file_options, @@ -1720,7 +1722,7 @@ do_standby_follow(void) log_detail("%s", follow_output.data); termPQExpBuffer(&follow_output); - exit(ERR_DB_QUERY); + exit(follow_error_code); } log_notice(_("STANDBY FOLLOW successful")); @@ -1740,7 +1742,7 @@ do_standby_follow(void) * this function. */ bool -do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_record, PQExpBufferData *output) +do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_record, PQExpBufferData *output, int *error_code) { t_node_info local_node_record = T_NODE_INFO_INITIALIZER; int original_upstream_node_id = UNKNOWN_NODE_ID; @@ -1763,8 +1765,9 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor { log_error(_("unable to retrieve record for node %i"), config_file_options.node_id); - PQfinish(primary_conn); - exit(ERR_BAD_CONFIG); + + *error_code = ERR_BAD_CONFIG; + return false; } /* @@ -1893,8 +1896,9 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor if (!create_recovery_file(&local_node_record, &recovery_conninfo, config_file_options.data_directory)) { - PQfinish(primary_conn); - exit(ERR_BAD_CONFIG); + /* XXX ERR_RECOVERY_FILE ??? */ + *error_code = ERR_BAD_CONFIG; + return false; } /* @@ -1931,8 +1935,9 @@ do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_recor if (success == false) { log_error(_("unable to %s server"), action); - PQfinish(primary_conn); - exit(ERR_NO_RESTART); + + *error_code = ERR_NO_RESTART; + return false; } } @@ -2059,6 +2064,7 @@ do_standby_switchover(void) config_file_options.node_id); PQfinish(local_conn); + exit(ERR_DB_QUERY); } diff --git a/repmgr-action-standby.h b/repmgr-action-standby.h index a15c2091..6431bd42 100644 --- a/repmgr-action-standby.h +++ b/repmgr-action-standby.h @@ -28,7 +28,7 @@ extern void do_standby_switchover(void); extern void do_standby_help(void); -extern bool do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_record, PQExpBufferData *output); +extern bool do_standby_follow_internal(PGconn *primary_conn, t_node_info *primary_node_record, PQExpBufferData *output, int *error_code);