From 24c6b2c9f1a251ac29086c71f41ee9f5c16548df Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Tue, 4 Jul 2017 23:14:05 +0900 Subject: [PATCH] repmgrd: initial code for cascaded standby failover --- dbutils.c | 36 +++++++++++ dbutils.h | 3 +- repmgrd.c | 181 +++++++++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 203 insertions(+), 17 deletions(-) diff --git a/dbutils.c b/dbutils.c index 39903bb9..664c0ce6 100644 --- a/dbutils.c +++ b/dbutils.c @@ -1632,6 +1632,42 @@ update_node_record_set_primary(PGconn *conn, int this_node_id) return commit_transaction(conn); } +bool +update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id) +{ + PQExpBufferData query; + PGresult *res; + + log_debug(_("update_node_record_set_upstream(): Updating node %i's upstream node to %i"), + this_node_id, new_upstream_node_id); + + initPQExpBuffer(&query); + + appendPQExpBuffer(&query, + " UPDATE repmgr.nodes " + " SET upstream_node_id = %i " + " WHERE id = %i ", + new_upstream_node_id, + this_node_id); + + log_verbose(LOG_DEBUG, "update_node_record_set_upstream():\n%s\n", query.data); + + res = PQexec(conn, query.data); + + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + log_error(_("unable to set new upstream node id:\n %s"), + PQerrorMessage(conn)); + PQclear(res); + + return false; + } + + PQclear(res); + + return true; +} + /* * Update node record following change of status diff --git a/dbutils.h b/dbutils.h index 36f9e064..8f304e4c 100644 --- a/dbutils.h +++ b/dbutils.h @@ -160,7 +160,7 @@ PGconn *establish_db_connection_by_params(const char *keywords[], const char *values[], const bool exit_on_error); PGconn *establish_primary_db_connection(PGconn *conn, - const bool exit_on_error); + const bool exit_on_error); PGconn *get_primary_connection(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out); PGconn *get_primary_connection_quiet(PGconn *standby_conn, int *primary_id, char *primary_conninfo_out); @@ -221,6 +221,7 @@ bool update_node_record(PGconn *conn, char *repmgr_action, t_node_info *node_in bool delete_node_record(PGconn *conn, int node); bool update_node_record_set_primary(PGconn *conn, int this_node_id); +bool update_node_record_set_upstream(PGconn *conn, int this_node_id, int new_upstream_node_id); bool update_node_record_status(PGconn *conn, int this_node_id, char *type, int upstream_node_id, bool active); void clear_node_info_list(NodeInfoList *nodes); diff --git a/repmgrd.c b/repmgrd.c index 3a355104..d9575491 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -651,8 +651,15 @@ monitor_streaming_standby(void) // handle failure - do we want to loop here? upstream_conn = establish_db_connection(upstream_node_info.conninfo, false); - // fix for cascaded standbys - primary_conn = upstream_conn; + if (upstream_node_info.type == STANDBY) + { + // XXX check result + primary_conn = establish_primary_db_connection(local_conn, false); + } + else + { + primary_conn = upstream_conn; + } /* Log startup event */ if (startup_event_logged == false) @@ -980,12 +987,145 @@ do_primary_failover(void) return false; } - +/* + * do_upstream_standby_failover() + * + * Attach cascaded standby to primary + * + * Currently we will try to attach to the cluster primary, as "repmgr + * standby follow" doesn't support attaching to another node. + * + * If this becomes supported, it might be worth providing a selection + * of reconnection strategies as different behaviour might be desirable + * in different situations; + * or maybe the option not to reconnect might be required? + * + * XXX check this handles replication slots gracefully + */ static bool do_upstream_standby_failover(void) { - // not implemented yet - return false; + PQExpBufferData event_details; + t_node_info primary_node_info = T_NODE_INFO_INITIALIZER; + RecordStatus record_status; + int r; + + // check status + record_status = get_primary_node_record(local_conn, &primary_node_info); + + /* + * Verify that we can still talk to the cluster primary, even though + * the node's upstream is not available + */ + + // consolidate below code + if (is_server_available(primary_node_info.conninfo) == false) + { + log_warning(_("connection to primary %i lost"), primary_node_info.node_id); + + if (primary_conn != NULL) + { + PQfinish(primary_conn); + primary_conn = NULL; + } + } + + if (PQstatus(primary_conn) != CONNECTION_OK) + { + log_info(_("attempting to reconnect")); + primary_conn = establish_db_connection(primary_node_info.conninfo, false); + + if (PQstatus(primary_conn) != CONNECTION_OK) + { + log_warning(_("reconnection failed")); + } + else + { + log_info(_("reconnected")); + } + } + + /* grandparent upstream is inactive */ + if (primary_node_info.active == false) + { + // XXX + } + + /* Close the connection to this server */ + PQfinish(local_conn); + local_conn = NULL; + + initPQExpBuffer(&event_details); + + log_debug(_("standby follow command is:\n \"%s\""), + config_file_options.follow_command); + + r = system(config_file_options.follow_command); + + if (r != 0) + { + appendPQExpBuffer(&event_details, + _("unable to execute follow command:\n %s"), + config_file_options.follow_command); + + log_error("%s", event_details.data); + + /* It may not possible to write to the event notification + * table but we should be able to generate an external notification + * if required. + */ + create_event_record(primary_conn, + &config_file_options, + local_node_info.node_id, + "repmgrd_failover_follow", + false, + event_details.data); + + termPQExpBuffer(&event_details); + } + + if (update_node_record_set_upstream(primary_conn, + local_node_info.node_id, + primary_node_info.node_id) == false) + { + appendPQExpBuffer(&event_details, + _("unable to set node %i's new upstream ID to %i"), + local_node_info.node_id, + primary_node_info.node_id); + + log_error("%s", event_details.data); + + create_event_record(NULL, + &config_file_options, + local_node_info.node_id, + "repmgrd_failover_follow", + false, + event_details.data); + + termPQExpBuffer(&event_details); + + terminate(ERR_BAD_CONFIG); + } + + appendPQExpBuffer(&event_details, + _("node %i is now following primary node %i"), + local_node_info.node_id, + primary_node_info.node_id); + + log_notice("%s", event_details.data); + + create_event_record(primary_conn, + &config_file_options, + local_node_info.node_id, + "repmgrd_failover_follow", + true, + event_details.data); + + termPQExpBuffer(&event_details); + + local_conn = establish_db_connection(config_file_options.conninfo, true); + + return true; } @@ -1049,10 +1189,10 @@ promote_self(void) { int primary_node_id; - primary_conn = get_primary_connection(local_conn, + upstream_conn = get_primary_connection(local_conn, &primary_node_id, NULL); - if (PQstatus(primary_conn) == CONNECTION_OK && primary_node_id == failed_primary.node_id) + if (PQstatus(upstream_conn) == CONNECTION_OK && primary_node_id == failed_primary.node_id) { log_notice(_("original primary (id: %i) reappeared before this standby was promoted - no action taken"), failed_primary.node_id); @@ -1063,7 +1203,7 @@ promote_self(void) failed_primary.node_name, failed_primary.node_id); - create_event_record(primary_conn, + create_event_record(upstream_conn, &config_file_options, local_node_info.node_id, "repmgrd_failover_abort", @@ -1072,7 +1212,6 @@ promote_self(void) termPQExpBuffer(&event_details); - //PQfinish(primary_conn); //primary_conn = NULL; // XXX handle this! @@ -1254,11 +1393,11 @@ follow_new_primary(int new_primary_id) PQfinish(local_conn); local_conn = NULL; - primary_conn = establish_db_connection(new_primary.conninfo, false); + upstream_conn = establish_db_connection(new_primary.conninfo, false); - if (PQstatus(primary_conn) == CONNECTION_OK) + if (PQstatus(upstream_conn) == CONNECTION_OK) { - RecoveryType primary_recovery_type = get_recovery_type(primary_conn); + RecoveryType primary_recovery_type = get_recovery_type(upstream_conn); if (primary_recovery_type == RECTYPE_PRIMARY) { new_primary_ok = true; @@ -1266,7 +1405,7 @@ follow_new_primary(int new_primary_id) else { log_warning(_("new primary is not in recovery")); - PQfinish(primary_conn); + PQfinish(upstream_conn); } } @@ -1319,8 +1458,8 @@ follow_new_primary(int new_primary_id) // XXX check success - record_status = get_node_record(primary_conn, new_primary_id, &upstream_node_info); - record_status = get_node_record(primary_conn, local_node_info.node_id, &local_node_info); + record_status = get_node_record(upstream_conn, new_primary_id, &upstream_node_info); + record_status = get_node_record(upstream_conn, local_node_info.node_id, &local_node_info); local_conn = establish_db_connection(local_node_info.conninfo, false); initPQExpBuffer(&event_details); @@ -1331,7 +1470,7 @@ follow_new_primary(int new_primary_id) log_notice("%s\n", event_details.data); - create_event_record(primary_conn, + create_event_record(upstream_conn, &config_file_options, local_node_info.node_id, "repmgrd_failover_follow", @@ -1808,10 +1947,20 @@ close_connections() if (PQisBusy(primary_conn) == 1) cancel_query(primary_conn, config_file_options.primary_response_timeout); PQfinish(primary_conn); + primary_conn = NULL; + } + + if (upstream_conn != NULL && PQstatus(upstream_conn) == CONNECTION_OK) + { + PQfinish(upstream_conn); + upstream_conn = NULL; } if (PQstatus(local_conn) == CONNECTION_OK) + { PQfinish(local_conn); + local_conn = NULL; + } }