From dd9df04334b7076bea0f346733b9a3c5ac671e7a Mon Sep 17 00:00:00 2001 From: Chris Fraser Date: Thu, 8 Feb 2018 19:14:26 -0800 Subject: [PATCH] Create checkpoint after pg_ctl promote (#378) Creates a Postgres checkpoint after `pg_ctl promote` runs on the former standby and before `pg_rewind` runs on the former master. This fixes the race condition that was reported in https://github.com/2ndQuadrant/repmgr/issues/372 --- dbutils.c | 19 +++++++++++++++++++ dbutils.h | 2 +- repmgr.c | 6 ++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/dbutils.c b/dbutils.c index 172e9818..09b91b2b 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1721,6 +1721,25 @@ create_event_record(PGconn *conn, t_configuration_options *options, int node_id, return success; } +bool +create_checkpoint(PGconn *conn) +{ + char sqlquery[MAXLEN]; + PGresult *res; + + sqlquery_snprintf(sqlquery, "CHECKPOINT"); + log_verbose(LOG_DEBUG, "checkpoint:\n%s\n", sqlquery); + + res = PQexec(conn, sqlquery); + if (!res || PQresultStatus(res) != PGRES_COMMAND_OK) + { + log_err(_("Unable to create CHECKPOINT:\n%s\n"), + PQerrorMessage(conn)); + PQfinish(conn); + exit(ERR_DB_QUERY); + } + log_notice(_("CHECKPOINT created\n")); +} bool update_node_record(PGconn *conn, char *action, int node, char *type, int upstream_node, char *cluster_name, char *node_name, char *conninfo, int priority, char *slot_name, bool active) diff --git a/dbutils.h b/dbutils.h index 9460cad6..b18f8795 100644 --- a/dbutils.h +++ b/dbutils.h @@ -135,9 +135,9 @@ bool update_node_record(PGconn *conn, char *action, int node, char *type, bool update_node_record_status(PGconn *conn, char *cluster_name, int this_node_id, char *type, int upstream_node_id, bool active); bool update_node_record_set_upstream(PGconn *conn, char *cluster_name, int this_node_id, int new_upstream_node_id); bool create_event_record(PGconn *conn, t_configuration_options *options, int node_id, char *event, bool successful, char *details); +bool create_checkpoint(PGconn *conn); int get_node_replication_state(PGconn *conn, char *node_name, char *output); t_server_type parse_node_type(const char *type); int get_data_checksum_version(const char *data_directory); #endif - diff --git a/repmgr.c b/repmgr.c index c6655dda..dfd8eab9 100644 --- a/repmgr.c +++ b/repmgr.c @@ -4515,6 +4515,12 @@ do_standby_promote(void) log_notice(_("STANDBY PROMOTE successful\n")); + /* + * Force a checkpoint so that pg_rewind on former master can tell that the + * servers have diverged. + */ + create_checkpoint(conn); + /* Log the event */ create_event_record(conn, &options,