mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-25 16:16:29 +00:00
repmgrd: notify sibling nodes to follow new primary after pg_ctl timeout
If "pg_ctl promote" fails due to a timeout, but the promotion itself succeeds,
have repmgrd on the new primary explicitly notify any sibling nodes to
follow it.
Previously the sibling nodes would wait "primary_notification_timeout" seconds
before attempting to discover the new primary.
This (and preceding commit eac80ae) address GitHub #425.
This commit is contained in:
@@ -889,12 +889,11 @@ monitor_streaming_standby(void)
|
|||||||
* It's possible the promote command timed out, but the promotion itself
|
* It's possible the promote command timed out, but the promotion itself
|
||||||
* succeeded. In this case failover state will be FAILOVER_STATE_PROMOTION_FAILED;
|
* succeeded. In this case failover state will be FAILOVER_STATE_PROMOTION_FAILED;
|
||||||
* we can update the node record ourselves and resume primary monitoring.
|
* we can update the node record ourselves and resume primary monitoring.
|
||||||
*
|
|
||||||
* XXX check if other standbys follow
|
|
||||||
*/
|
*/
|
||||||
if (failover_state == FAILOVER_STATE_PROMOTION_FAILED)
|
if (failover_state == FAILOVER_STATE_PROMOTION_FAILED)
|
||||||
{
|
{
|
||||||
int degraded_monitoring_elapsed;
|
int degraded_monitoring_elapsed;
|
||||||
|
int former_upstream_node_id = local_node_info.upstream_node_id;
|
||||||
|
|
||||||
update_node_record_set_primary(local_conn, local_node_info.node_id);
|
update_node_record_set_primary(local_conn, local_node_info.node_id);
|
||||||
record_status = get_node_record(local_conn, local_node_info.node_id, &local_node_info);
|
record_status = get_node_record(local_conn, local_node_info.node_id, &local_node_info);
|
||||||
@@ -904,6 +903,25 @@ monitor_streaming_standby(void)
|
|||||||
log_notice(_("resuming monitoring as primary node after %i seconds"),
|
log_notice(_("resuming monitoring as primary node after %i seconds"),
|
||||||
degraded_monitoring_elapsed);
|
degraded_monitoring_elapsed);
|
||||||
|
|
||||||
|
initPQExpBuffer(&event_details);
|
||||||
|
appendPQExpBuffer(&event_details,
|
||||||
|
"promotion command failed but promotion completed successfully");
|
||||||
|
create_event_notification(local_conn,
|
||||||
|
&config_file_options,
|
||||||
|
local_node_info.node_id,
|
||||||
|
"repmgrd_failover_promote",
|
||||||
|
true,
|
||||||
|
event_details.data);
|
||||||
|
|
||||||
|
termPQExpBuffer(&event_details);
|
||||||
|
|
||||||
|
/* notify former siblings that they should now follow this node */
|
||||||
|
get_active_sibling_node_records(local_conn,
|
||||||
|
local_node_info.node_id,
|
||||||
|
former_upstream_node_id,
|
||||||
|
&standby_nodes);
|
||||||
|
notify_followers(&standby_nodes, local_node_info.node_id);
|
||||||
|
|
||||||
/* this will restart monitoring in primary mode */
|
/* this will restart monitoring in primary mode */
|
||||||
monitoring_state = MS_NORMAL;
|
monitoring_state = MS_NORMAL;
|
||||||
return;
|
return;
|
||||||
|
|||||||
Reference in New Issue
Block a user