Create checkpoint after pg_ctl promote (#378)

Creates a Postgres checkpoint after `pg_ctl promote` runs on the former standby and before `pg_rewind` runs on the former master. This fixes the race condition that was reported in https://github.com/2ndQuadrant/repmgr/issues/372
This commit is contained in:
Chris Fraser
2018-02-08 19:14:26 -08:00
committed by Ian Barwick
parent 5411225b6f
commit dd9df04334
3 changed files with 26 additions and 1 deletions

View File

@@ -1721,6 +1721,25 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id,
return success;
}
bool
create_checkpoint(PGconn *conn)
{
char sqlquery[MAXLEN];
PGresult *res;
sqlquery_snprintf(sqlquery, "CHECKPOINT");
log_verbose(LOG_DEBUG, "checkpoint:\n%s\n", sqlquery);
res = PQexec(conn, sqlquery);
if (!res || PQresultStatus(res) != PGRES_COMMAND_OK)
{
log_err(_("Unable to create CHECKPOINT:\n%s\n"),
PQerrorMessage(conn));
PQfinish(conn);
exit(ERR_DB_QUERY);
}
log_notice(_("CHECKPOINT created\n"));
}
bool
update_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active)

View File

@@ -135,9 +135,9 @@ bool update_node_record(PGconn *conn, char *action, int node, char *type,
bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active);
bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id);
bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details);
bool create_checkpoint(PGconn *conn);
int get_node_replication_state(PGconn *conn, char *node_name, char *output);
t_server_type parse_node_type(const char *type);
int get_data_checksum_version(const char *data_directory);
#endif

View File

@@ -4515,6 +4515,12 @@ do_standby_promote(void)
log_notice(_("STANDBY PROMOTE successful\n"));
/*
* Force a checkpoint so that pg_rewind on former master can tell that the
* servers have diverged.
*/
create_checkpoint(conn);
/* Log the event */
create_event_record(conn,
&options,