mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 14:46:29 +00:00
Since b5934bfd6071 in postgresql.git the flag `-Wshadow=compatible-local` is activated. This commit fixes any duplicated declaration made in the same function. References: HL-40
3720 lines
91 KiB
C
3720 lines
91 KiB
C
/*
|
|
* repmgr-action-node.c
|
|
*
|
|
* Implements actions available for any kind of node
|
|
*
|
|
* Copyright (c) EnterpriseDB Corporation, 2010-2021
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <sys/stat.h>
|
|
#include <dirent.h>
|
|
|
|
#include "repmgr.h"
|
|
#include "controldata.h"
|
|
#include "dirutil.h"
|
|
#include "dbutils.h"
|
|
#include "compat.h"
|
|
|
|
#include "repmgr-client-global.h"
|
|
#include "repmgr-action-node.h"
|
|
#include "repmgr-action-standby.h"
|
|
|
|
static bool copy_file(const char *src_file, const char *dest_file);
|
|
static void format_archive_dir(PQExpBufferData *archive_dir);
|
|
static t_server_action parse_server_action(const char *action);
|
|
static const char *output_repmgrd_status(CheckStatus status);
|
|
|
|
static void exit_optformat_error(const char *error, int errcode);
|
|
|
|
static void _do_node_service_list_actions(t_server_action action);
|
|
static void _do_node_status_is_shutdown_cleanly(void);
|
|
static void _do_node_archive_config(void);
|
|
static void _do_node_restore_config(void);
|
|
|
|
static void do_node_check_replication_connection(void);
|
|
static CheckStatus do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list_output);
|
|
static CheckStatus do_node_check_downstream(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
|
static CheckStatus do_node_check_upstream(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
|
static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
|
static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
|
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
|
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
|
static CheckStatus do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
|
static CheckStatus do_node_check_repmgrd(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
|
static CheckStatus do_node_check_replication_config_owner(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
|
|
static CheckStatus do_node_check_db_connection(PGconn *conn, OutputMode mode);
|
|
|
|
|
|
/*
|
|
* NODE STATUS
|
|
*
|
|
* Can only be run on the local node, as it needs to be able to
|
|
* read the data directory.
|
|
*
|
|
* Parameters:
|
|
* --is-shutdown-cleanly (for internal use only)
|
|
* --csv
|
|
*/
|
|
|
|
void
|
|
do_node_status(void)
|
|
{
|
|
PGconn *conn = NULL;
|
|
|
|
t_node_info node_info = T_NODE_INFO_INITIALIZER;
|
|
char cluster_size[MAXLEN];
|
|
PQExpBufferData output;
|
|
|
|
KeyValueList node_status = {NULL, NULL};
|
|
KeyValueListCell *cell = NULL;
|
|
NodeInfoList missing_slots = T_NODE_INFO_LIST_INITIALIZER;
|
|
|
|
ItemList warnings = {NULL, NULL};
|
|
RecoveryType recovery_type = RECTYPE_UNKNOWN;
|
|
ReplInfo replication_info;
|
|
t_recovery_conf recovery_conf = T_RECOVERY_CONF_INITIALIZER;
|
|
|
|
char data_dir[MAXPGPATH] = "";
|
|
char server_version_str[MAXVERSIONSTR] = "";
|
|
|
|
/*
|
|
* A database connection is *not* required for this check
|
|
*/
|
|
if (runtime_options.is_shutdown_cleanly == true)
|
|
{
|
|
return _do_node_status_is_shutdown_cleanly();
|
|
}
|
|
|
|
init_replication_info(&replication_info);
|
|
|
|
|
|
/* config file required, so we should have "conninfo" and "data_directory" */
|
|
conn = establish_db_connection(config_file_options.conninfo, true);
|
|
strncpy(data_dir, config_file_options.data_directory, MAXPGPATH);
|
|
|
|
(void)get_server_version(conn, server_version_str);
|
|
|
|
/* check node exists */
|
|
|
|
if (get_node_record_with_upstream(conn, config_file_options.node_id, &node_info) != RECORD_FOUND)
|
|
{
|
|
log_error(_("no record found for node %i"), config_file_options.node_id);
|
|
PQfinish(conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
if (get_cluster_size(conn, cluster_size) == false)
|
|
strncpy(cluster_size, _("unknown"), MAXLEN);
|
|
|
|
recovery_type = get_recovery_type(conn);
|
|
|
|
get_node_replication_stats(conn, &node_info);
|
|
|
|
key_value_list_set(&node_status,
|
|
"PostgreSQL version",
|
|
server_version_str);
|
|
|
|
key_value_list_set(&node_status,
|
|
"Total data size",
|
|
cluster_size);
|
|
|
|
key_value_list_set(&node_status,
|
|
"Conninfo",
|
|
node_info.conninfo);
|
|
|
|
if (runtime_options.verbose == true)
|
|
{
|
|
uint64 local_system_identifier = get_system_identifier(config_file_options.data_directory);
|
|
|
|
if (local_system_identifier == UNKNOWN_SYSTEM_IDENTIFIER)
|
|
{
|
|
key_value_list_set(&node_status,
|
|
"System identifier",
|
|
"unknown");
|
|
item_list_append_format(&warnings,
|
|
_("unable to retrieve system identifier from pg_control"));
|
|
}
|
|
else
|
|
{
|
|
key_value_list_set_format(&node_status,
|
|
"System identifier",
|
|
"%lu", local_system_identifier);
|
|
}
|
|
}
|
|
|
|
key_value_list_set(&node_status,
|
|
"Role",
|
|
get_node_type_string(node_info.type));
|
|
|
|
switch (node_info.type)
|
|
{
|
|
case PRIMARY:
|
|
if (recovery_type == RECTYPE_STANDBY)
|
|
{
|
|
item_list_append(&warnings,
|
|
_("- node is registered as primary but running as standby"));
|
|
}
|
|
break;
|
|
case STANDBY:
|
|
if (recovery_type == RECTYPE_PRIMARY)
|
|
{
|
|
item_list_append(&warnings,
|
|
_("- node is registered as standby but running as primary"));
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (guc_set(conn, "archive_mode", "=", "off"))
|
|
{
|
|
key_value_list_set(&node_status,
|
|
"WAL archiving",
|
|
"off");
|
|
|
|
key_value_list_set(&node_status,
|
|
"Archive command",
|
|
"(none)");
|
|
}
|
|
else
|
|
{
|
|
/* "archive_mode" is not "off", i.e. one of "on", "always" */
|
|
bool enabled = true;
|
|
PQExpBufferData archiving_status;
|
|
char archive_command[MAXLEN] = "";
|
|
|
|
initPQExpBuffer(&archiving_status);
|
|
|
|
/*
|
|
* if the node is a standby, and "archive_mode" is "on", archiving will
|
|
* actually be disabled.
|
|
*/
|
|
if (recovery_type == RECTYPE_STANDBY)
|
|
{
|
|
if (guc_set(conn, "archive_mode", "=", "on"))
|
|
enabled = false;
|
|
}
|
|
|
|
if (enabled == true)
|
|
{
|
|
appendPQExpBufferStr(&archiving_status, "enabled");
|
|
}
|
|
else
|
|
{
|
|
appendPQExpBufferStr(&archiving_status, "disabled");
|
|
}
|
|
|
|
if (enabled == false && recovery_type == RECTYPE_STANDBY)
|
|
{
|
|
if (PQserverVersion(conn) >= 90500)
|
|
{
|
|
appendPQExpBufferStr(&archiving_status,
|
|
" (on standbys \"archive_mode\" must be set to \"always\" to be effective)");
|
|
}
|
|
else
|
|
{
|
|
appendPQExpBufferStr(&archiving_status,
|
|
" (\"archive_mode\" has no effect on standbys)");
|
|
}
|
|
}
|
|
|
|
key_value_list_set(&node_status,
|
|
"WAL archiving",
|
|
archiving_status.data);
|
|
|
|
termPQExpBuffer(&archiving_status);
|
|
|
|
get_pg_setting(conn, "archive_command", archive_command);
|
|
|
|
key_value_list_set(&node_status,
|
|
"Archive command",
|
|
archive_command);
|
|
}
|
|
|
|
{
|
|
int ready_files;
|
|
|
|
ready_files = get_ready_archive_files(conn, data_dir);
|
|
|
|
if (ready_files == ARCHIVE_STATUS_DIR_ERROR)
|
|
{
|
|
item_list_append_format(&warnings,
|
|
"- unable to check archive_status directory\n");
|
|
}
|
|
else
|
|
{
|
|
if (runtime_options.output_mode == OM_CSV)
|
|
{
|
|
key_value_list_set_format(&node_status,
|
|
"WALs pending archiving",
|
|
"%i",
|
|
ready_files);
|
|
}
|
|
else
|
|
{
|
|
key_value_list_set_format(&node_status,
|
|
"WALs pending archiving",
|
|
"%i pending files",
|
|
ready_files);
|
|
}
|
|
}
|
|
|
|
if (guc_set(conn, "archive_mode", "=", "off"))
|
|
{
|
|
key_value_list_set_output_mode(&node_status, "WALs pending archiving", OM_CSV);
|
|
}
|
|
|
|
}
|
|
|
|
|
|
if (node_info.max_wal_senders >= 0)
|
|
{
|
|
/* In CSV mode, raw values supplied as well */
|
|
key_value_list_set_format(&node_status,
|
|
"Replication connections",
|
|
"%i (of maximal %i)",
|
|
node_info.attached_wal_receivers,
|
|
node_info.max_wal_senders);
|
|
}
|
|
else if (node_info.max_wal_senders == 0)
|
|
{
|
|
key_value_list_set_format(&node_status,
|
|
"Replication connections",
|
|
"disabled");
|
|
}
|
|
|
|
/* check for attached nodes */
|
|
{
|
|
NodeInfoList downstream_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
|
NodeInfoListCell *node_cell = NULL;
|
|
ItemList missing_nodes = {NULL, NULL};
|
|
int missing_nodes_count = 0;
|
|
int expected_nodes_count = 0;
|
|
|
|
get_downstream_node_records(conn, config_file_options.node_id, &downstream_nodes);
|
|
|
|
/* if a witness node is present, we'll need to remove this from the total */
|
|
expected_nodes_count = downstream_nodes.node_count;
|
|
|
|
for (node_cell = downstream_nodes.head; node_cell; node_cell = node_cell->next)
|
|
{
|
|
/* skip witness server */
|
|
if (node_cell->node_info->type == WITNESS)
|
|
{
|
|
expected_nodes_count --;
|
|
continue;
|
|
}
|
|
|
|
if (is_downstream_node_attached(conn, node_cell->node_info->node_name, NULL) != NODE_ATTACHED)
|
|
{
|
|
missing_nodes_count++;
|
|
item_list_append_format(&missing_nodes,
|
|
"%s (ID: %i)",
|
|
node_cell->node_info->node_name,
|
|
node_cell->node_info->node_id);
|
|
}
|
|
}
|
|
|
|
if (missing_nodes_count)
|
|
{
|
|
ItemListCell *missing_cell = NULL;
|
|
|
|
item_list_append_format(&warnings,
|
|
_("- %i of %i downstream nodes not attached:"),
|
|
missing_nodes_count,
|
|
expected_nodes_count);
|
|
|
|
for (missing_cell = missing_nodes.head; missing_cell; missing_cell = missing_cell->next)
|
|
{
|
|
item_list_append_format(&warnings,
|
|
" - %s\n", missing_cell->string);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (node_info.max_replication_slots == 0)
|
|
{
|
|
key_value_list_set(&node_status,
|
|
"Replication slots",
|
|
"disabled");
|
|
}
|
|
else
|
|
{
|
|
PQExpBufferData slotinfo;
|
|
|
|
/*
|
|
* check for missing replication slots - we do this regardless of
|
|
* what "max_replication_slots" is set to, in case the downstream
|
|
* node was configured with "use_replication_slots=true" and is
|
|
* expecting a replication slot to be available
|
|
*/
|
|
get_downstream_nodes_with_missing_slot(conn,
|
|
config_file_options.node_id,
|
|
&missing_slots);
|
|
|
|
if (missing_slots.node_count > 0)
|
|
{
|
|
NodeInfoListCell *missing_slot_cell = NULL;
|
|
|
|
item_list_append_format(&warnings,
|
|
_("- replication slots missing for following %i node(s):"),
|
|
missing_slots.node_count);
|
|
|
|
for (missing_slot_cell = missing_slots.head; missing_slot_cell; missing_slot_cell = missing_slot_cell->next)
|
|
{
|
|
item_list_append_format(&warnings,
|
|
_(" - %s (ID: %i, slot name: \"%s\")"),
|
|
missing_slot_cell->node_info->node_name,
|
|
missing_slot_cell->node_info->node_id,
|
|
missing_slot_cell->node_info->slot_name);
|
|
}
|
|
}
|
|
|
|
initPQExpBuffer(&slotinfo);
|
|
|
|
appendPQExpBuffer(&slotinfo,
|
|
"%i physical (of maximal %i; %i missing)",
|
|
node_info.active_replication_slots + node_info.inactive_replication_slots,
|
|
node_info.max_replication_slots,
|
|
missing_slots.node_count);
|
|
|
|
if (node_info.inactive_replication_slots > 0)
|
|
{
|
|
KeyValueList inactive_replication_slots = {NULL, NULL};
|
|
|
|
(void) get_inactive_replication_slots(conn, &inactive_replication_slots);
|
|
|
|
appendPQExpBuffer(&slotinfo,
|
|
"; %i inactive",
|
|
node_info.inactive_replication_slots);
|
|
|
|
item_list_append_format(&warnings,
|
|
_("- node has %i inactive physical replication slots"),
|
|
node_info.inactive_replication_slots);
|
|
|
|
for (cell = inactive_replication_slots.head; cell; cell = cell->next)
|
|
{
|
|
item_list_append_format(&warnings,
|
|
" - %s", cell->key);
|
|
}
|
|
|
|
key_value_list_free(&inactive_replication_slots);
|
|
}
|
|
|
|
key_value_list_set(&node_status,
|
|
"Replication slots",
|
|
slotinfo.data);
|
|
|
|
termPQExpBuffer(&slotinfo);
|
|
}
|
|
|
|
|
|
if (node_info.type == STANDBY)
|
|
{
|
|
key_value_list_set_format(&node_status,
|
|
"Upstream node",
|
|
"%s (ID: %i)",
|
|
node_info.upstream_node_name,
|
|
node_info.upstream_node_id);
|
|
|
|
get_replication_info(conn, node_info.type, &replication_info);
|
|
|
|
key_value_list_set_format(&node_status,
|
|
"Replication lag",
|
|
"%i seconds",
|
|
replication_info.replication_lag_time);
|
|
|
|
key_value_list_set_format(&node_status,
|
|
"Last received LSN",
|
|
"%X/%X", format_lsn(replication_info.last_wal_receive_lsn));
|
|
|
|
key_value_list_set_format(&node_status,
|
|
"Last replayed LSN",
|
|
"%X/%X", format_lsn(replication_info.last_wal_replay_lsn));
|
|
}
|
|
else
|
|
{
|
|
key_value_list_set(&node_status,
|
|
"Upstream node",
|
|
"(none)");
|
|
key_value_list_set_output_mode(&node_status,
|
|
"Upstream node",
|
|
OM_CSV);
|
|
|
|
key_value_list_set(&node_status,
|
|
"Replication lag",
|
|
"n/a");
|
|
|
|
key_value_list_set(&node_status,
|
|
"Last received LSN",
|
|
"(none)");
|
|
|
|
key_value_list_set_output_mode(&node_status,
|
|
"Last received LSN",
|
|
OM_CSV);
|
|
|
|
key_value_list_set(&node_status,
|
|
"Last replayed LSN",
|
|
"(none)");
|
|
|
|
key_value_list_set_output_mode(&node_status,
|
|
"Last replayed LSN",
|
|
OM_CSV);
|
|
}
|
|
|
|
|
|
parse_recovery_conf(data_dir, &recovery_conf);
|
|
|
|
/* format output */
|
|
initPQExpBuffer(&output);
|
|
|
|
if (runtime_options.output_mode == OM_CSV)
|
|
{
|
|
appendPQExpBuffer(&output,
|
|
"\"Node name\",\"%s\"\n",
|
|
node_info.node_name);
|
|
|
|
appendPQExpBuffer(&output,
|
|
"\"Node ID\",\"%i\"\n",
|
|
node_info.node_id);
|
|
|
|
for (cell = node_status.head; cell; cell = cell->next)
|
|
{
|
|
appendPQExpBuffer(&output,
|
|
"\"%s\",\"%s\"\n",
|
|
cell->key, cell->value);
|
|
}
|
|
|
|
/* we'll add the raw data as well */
|
|
appendPQExpBuffer(&output,
|
|
"\"max_wal_senders\",%i\n",
|
|
node_info.max_wal_senders);
|
|
|
|
appendPQExpBuffer(&output,
|
|
"\"occupied_wal_senders\",%i\n",
|
|
node_info.attached_wal_receivers);
|
|
|
|
appendPQExpBuffer(&output,
|
|
"\"max_replication_slots\",%i\n",
|
|
node_info.max_replication_slots);
|
|
|
|
appendPQExpBuffer(&output,
|
|
"\"active_replication_slots\",%i\n",
|
|
node_info.active_replication_slots);
|
|
|
|
/* output inactive slot information */
|
|
appendPQExpBuffer(&output,
|
|
"\"inactive_replication_slots\",%i",
|
|
node_info.inactive_replication_slots);
|
|
|
|
if (node_info.inactive_replication_slots)
|
|
{
|
|
KeyValueList inactive_replication_slots = {NULL, NULL};
|
|
(void) get_inactive_replication_slots(conn, &inactive_replication_slots);
|
|
for (cell = inactive_replication_slots.head; cell; cell = cell->next)
|
|
{
|
|
appendPQExpBuffer(&output,
|
|
",\"%s\"", cell->key);
|
|
}
|
|
|
|
key_value_list_free(&inactive_replication_slots);
|
|
}
|
|
|
|
/* output missing slot information */
|
|
|
|
appendPQExpBufferChar(&output, '\n');
|
|
appendPQExpBuffer(&output,
|
|
"\"missing_replication_slots\",%i",
|
|
missing_slots.node_count);
|
|
|
|
if (missing_slots.node_count > 0)
|
|
{
|
|
NodeInfoListCell *missing_slot_cell = NULL;
|
|
|
|
for (missing_slot_cell = missing_slots.head; missing_slot_cell; missing_slot_cell = missing_slot_cell->next)
|
|
{
|
|
appendPQExpBuffer(&output,
|
|
",\"%s\"", missing_slot_cell->node_info->slot_name);
|
|
}
|
|
}
|
|
|
|
}
|
|
else
|
|
{
|
|
appendPQExpBuffer(&output,
|
|
"Node \"%s\":\n",
|
|
node_info.node_name);
|
|
|
|
for (cell = node_status.head; cell; cell = cell->next)
|
|
{
|
|
if (cell->output_mode == OM_NOT_SET)
|
|
appendPQExpBuffer(&output,
|
|
"\t%s: %s\n",
|
|
cell->key, cell->value);
|
|
}
|
|
}
|
|
|
|
puts(output.data);
|
|
|
|
termPQExpBuffer(&output);
|
|
|
|
if (warnings.head != NULL && runtime_options.terse == false && runtime_options.output_mode == OM_TEXT)
|
|
{
|
|
log_warning(_("following issue(s) were detected:"));
|
|
print_item_list(&warnings);
|
|
log_hint(_("execute \"repmgr node check\" for more details"));
|
|
}
|
|
|
|
clear_node_info_list(&missing_slots);
|
|
key_value_list_free(&node_status);
|
|
item_list_free(&warnings);
|
|
PQfinish(conn);
|
|
|
|
/*
|
|
* If warnings were noted, even if they're not displayed (e.g. in --csv node),
|
|
* that means something's not right so we need to emit a non-zero exit code.
|
|
*/
|
|
if (warnings.head != NULL)
|
|
{
|
|
exit(ERR_NODE_STATUS);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
/*
|
|
* Returns information about the running state of the node.
|
|
* For internal use during "standby switchover".
|
|
*
|
|
* Returns "longopt" output:
|
|
*
|
|
* --status=(RUNNING|SHUTDOWN|UNCLEAN_SHUTDOWN|UNKNOWN)
|
|
* --last-checkpoint=...
|
|
*/
|
|
|
|
static void
|
|
_do_node_status_is_shutdown_cleanly(void)
|
|
{
|
|
PGPing ping_status;
|
|
PQExpBufferData output;
|
|
|
|
DBState db_state;
|
|
XLogRecPtr checkPoint = InvalidXLogRecPtr;
|
|
|
|
NodeStatus node_status = NODE_STATUS_UNKNOWN;
|
|
|
|
initPQExpBuffer(&output);
|
|
|
|
appendPQExpBufferStr(&output,
|
|
"--state=");
|
|
|
|
/* sanity-check we're dealing with a PostgreSQL directory */
|
|
if (is_pg_dir(config_file_options.data_directory) == false)
|
|
{
|
|
appendPQExpBufferStr(&output, "UNKNOWN");
|
|
printf("%s\n", output.data);
|
|
termPQExpBuffer(&output);
|
|
return;
|
|
}
|
|
|
|
ping_status = PQping(config_file_options.conninfo);
|
|
|
|
switch (ping_status)
|
|
{
|
|
case PQPING_OK:
|
|
node_status = NODE_STATUS_UP;
|
|
break;
|
|
case PQPING_REJECT:
|
|
node_status = NODE_STATUS_UP;
|
|
break;
|
|
case PQPING_NO_ATTEMPT:
|
|
case PQPING_NO_RESPONSE:
|
|
/* status not yet clear */
|
|
break;
|
|
}
|
|
|
|
/* check what pg_control says */
|
|
|
|
if (get_db_state(config_file_options.data_directory, &db_state) == false)
|
|
{
|
|
/*
|
|
* Unable to retrieve the database state from pg_control
|
|
*/
|
|
node_status = NODE_STATUS_UNKNOWN;
|
|
log_verbose(LOG_DEBUG, "unable to determine db state");
|
|
goto return_state;
|
|
}
|
|
|
|
log_verbose(LOG_DEBUG, "db state now: %s", describe_db_state(db_state));
|
|
|
|
if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY)
|
|
{
|
|
if (node_status != NODE_STATUS_UP)
|
|
{
|
|
node_status = NODE_STATUS_UNCLEAN_SHUTDOWN;
|
|
}
|
|
/* server is still responding but shutting down */
|
|
else if (db_state == DB_SHUTDOWNING)
|
|
{
|
|
node_status = NODE_STATUS_SHUTTING_DOWN;
|
|
}
|
|
}
|
|
|
|
checkPoint = get_latest_checkpoint_location(config_file_options.data_directory);
|
|
|
|
if (checkPoint == InvalidXLogRecPtr)
|
|
{
|
|
/* unable to read pg_control, don't know what's happening */
|
|
node_status = NODE_STATUS_UNKNOWN;
|
|
}
|
|
else if (node_status == NODE_STATUS_UNKNOWN)
|
|
{
|
|
/*
|
|
* if still "UNKNOWN" at this point, then the node must be cleanly shut
|
|
* down
|
|
*/
|
|
node_status = NODE_STATUS_DOWN;
|
|
}
|
|
|
|
|
|
return_state:
|
|
|
|
log_verbose(LOG_DEBUG, "node status determined as: %s",
|
|
print_node_status(node_status));
|
|
|
|
appendPQExpBuffer(&output,
|
|
"%s", print_node_status(node_status));
|
|
|
|
if (node_status == NODE_STATUS_DOWN)
|
|
{
|
|
appendPQExpBuffer(&output,
|
|
" --last-checkpoint-lsn=%X/%X",
|
|
format_lsn(checkPoint));
|
|
}
|
|
|
|
printf("%s\n", output.data);
|
|
termPQExpBuffer(&output);
|
|
return;
|
|
}
|
|
|
|
static void
|
|
exit_optformat_error(const char *error, int errcode)
|
|
{
|
|
PQExpBufferData output;
|
|
|
|
Assert(runtime_options.output_mode == OM_OPTFORMAT);
|
|
|
|
initPQExpBuffer(&output);
|
|
|
|
appendPQExpBuffer(&output,
|
|
"--error=%s",
|
|
error);
|
|
|
|
printf("%s\n", output.data);
|
|
|
|
termPQExpBuffer(&output);
|
|
|
|
exit(errcode);
|
|
}
|
|
|
|
/*
|
|
* Configuration file required
|
|
*/
|
|
void
|
|
do_node_check(void)
|
|
{
|
|
PGconn *conn = NULL;
|
|
PQExpBufferData output;
|
|
|
|
t_node_info node_info = T_NODE_INFO_INITIALIZER;
|
|
|
|
CheckStatus return_code;
|
|
CheckStatusList status_list = {NULL, NULL};
|
|
CheckStatusListCell *cell = NULL;
|
|
|
|
bool issue_detected = false;
|
|
bool exit_on_connection_error = true;
|
|
|
|
/* for internal use */
|
|
if (runtime_options.has_passfile == true)
|
|
{
|
|
return_code = has_passfile() ? 0 : 1;
|
|
|
|
exit(return_code);
|
|
}
|
|
|
|
/* for use by "standby switchover" */
|
|
if (runtime_options.replication_connection == true)
|
|
{
|
|
do_node_check_replication_connection();
|
|
exit(SUCCESS);
|
|
}
|
|
|
|
if (runtime_options.db_connection == true)
|
|
{
|
|
exit_on_connection_error = false;
|
|
}
|
|
|
|
/*
|
|
* If --optformat was provided, we'll assume this is a remote invocation
|
|
* and instead of exiting with an error, we'll return an error string to
|
|
* so the remote invoker will know what's happened.
|
|
*/
|
|
if (runtime_options.output_mode == OM_OPTFORMAT)
|
|
{
|
|
exit_on_connection_error = false;
|
|
}
|
|
|
|
|
|
if (config_file_options.conninfo[0] != '\0')
|
|
{
|
|
t_conninfo_param_list node_conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
|
char *errmsg = NULL;
|
|
bool parse_success = false;
|
|
|
|
initialize_conninfo_params(&node_conninfo, false);
|
|
|
|
parse_success = parse_conninfo_string(config_file_options.conninfo,
|
|
&node_conninfo,
|
|
&errmsg, false);
|
|
|
|
if (parse_success == false)
|
|
{
|
|
if (runtime_options.output_mode == OM_OPTFORMAT)
|
|
{
|
|
exit_optformat_error("CONNINFO_PARSE",
|
|
ERR_BAD_CONFIG);
|
|
}
|
|
|
|
log_error(_("unable to parse conninfo string \"%s\" for local node"),
|
|
config_file_options.conninfo);
|
|
log_detail("%s", errmsg);
|
|
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
/*
|
|
* If --superuser option provided, attempt to connect as the specified user
|
|
*/
|
|
|
|
if (runtime_options.superuser[0] != '\0')
|
|
{
|
|
conn = establish_db_connection_with_replacement_param(
|
|
config_file_options.conninfo,
|
|
"user",
|
|
runtime_options.superuser,
|
|
exit_on_connection_error);
|
|
}
|
|
else
|
|
{
|
|
conn = establish_db_connection_by_params(&node_conninfo, exit_on_connection_error);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
conn = establish_db_connection_by_params(&source_conninfo, exit_on_connection_error);
|
|
}
|
|
|
|
|
|
/*
|
|
* --db-connection option provided
|
|
*/
|
|
if (runtime_options.db_connection == true)
|
|
{
|
|
return_code = do_node_check_db_connection(conn, runtime_options.output_mode);
|
|
PQfinish(conn);
|
|
exit(return_code);
|
|
}
|
|
|
|
/*
|
|
* If we've reached here, and the connection is invalid, then --optformat was provided
|
|
*/
|
|
if (PQstatus(conn) != CONNECTION_OK)
|
|
{
|
|
exit_optformat_error("DB_CONNECTION",
|
|
ERR_DB_CONN);
|
|
}
|
|
|
|
if (get_node_record(conn, config_file_options.node_id, &node_info) != RECORD_FOUND)
|
|
{
|
|
log_error(_("no record found for node %i"), config_file_options.node_id);
|
|
PQfinish(conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
/* add replication statistics to node record */
|
|
get_node_replication_stats(conn, &node_info);
|
|
|
|
/*
|
|
* handle specific checks ======================
|
|
*/
|
|
if (runtime_options.archive_ready == true)
|
|
{
|
|
return_code = do_node_check_archive_ready(conn,
|
|
runtime_options.output_mode,
|
|
NULL);
|
|
PQfinish(conn);
|
|
exit(return_code);
|
|
}
|
|
|
|
if (runtime_options.upstream == true)
|
|
{
|
|
return_code = do_node_check_upstream(conn,
|
|
runtime_options.output_mode,
|
|
&node_info,
|
|
NULL);
|
|
PQfinish(conn);
|
|
exit(return_code);
|
|
}
|
|
|
|
if (runtime_options.downstream == true)
|
|
{
|
|
return_code = do_node_check_downstream(conn,
|
|
runtime_options.output_mode,
|
|
&node_info,
|
|
NULL);
|
|
PQfinish(conn);
|
|
exit(return_code);
|
|
}
|
|
|
|
if (runtime_options.replication_lag == true)
|
|
{
|
|
return_code = do_node_check_replication_lag(conn,
|
|
runtime_options.output_mode,
|
|
&node_info,
|
|
NULL);
|
|
PQfinish(conn);
|
|
exit(return_code);
|
|
}
|
|
|
|
if (runtime_options.role == true)
|
|
{
|
|
return_code = do_node_check_role(conn,
|
|
runtime_options.output_mode,
|
|
&node_info,
|
|
NULL);
|
|
PQfinish(conn);
|
|
exit(return_code);
|
|
}
|
|
|
|
if (runtime_options.slots == true)
|
|
{
|
|
return_code = do_node_check_slots(conn,
|
|
runtime_options.output_mode,
|
|
&node_info,
|
|
NULL);
|
|
PQfinish(conn);
|
|
exit(return_code);
|
|
}
|
|
|
|
if (runtime_options.missing_slots == true)
|
|
{
|
|
return_code = do_node_check_missing_slots(conn,
|
|
runtime_options.output_mode,
|
|
&node_info,
|
|
NULL);
|
|
PQfinish(conn);
|
|
exit(return_code);
|
|
}
|
|
|
|
if (runtime_options.data_directory_config == true)
|
|
{
|
|
return_code = do_node_check_data_directory(conn,
|
|
runtime_options.output_mode,
|
|
&node_info,
|
|
NULL);
|
|
PQfinish(conn);
|
|
exit(return_code);
|
|
}
|
|
|
|
if (runtime_options.repmgrd == true)
|
|
{
|
|
return_code = do_node_check_repmgrd(conn,
|
|
runtime_options.output_mode,
|
|
&node_info,
|
|
NULL);
|
|
PQfinish(conn);
|
|
exit(return_code);
|
|
}
|
|
|
|
if (runtime_options.replication_config_owner == true)
|
|
{
|
|
return_code = do_node_check_replication_config_owner(conn,
|
|
runtime_options.output_mode,
|
|
&node_info,
|
|
NULL);
|
|
PQfinish(conn);
|
|
exit(return_code);
|
|
}
|
|
|
|
|
|
if (runtime_options.output_mode == OM_NAGIOS)
|
|
{
|
|
log_error(_("--nagios can only be used with a specific check"));
|
|
log_hint(_("execute \"repmgr node --help\" for details"));
|
|
PQfinish(conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
/* output general overview */
|
|
|
|
initPQExpBuffer(&output);
|
|
|
|
/* order functions are called is also output order */
|
|
if (do_node_check_role(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
|
issue_detected = true;
|
|
|
|
if (do_node_check_replication_lag(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
|
issue_detected = true;
|
|
|
|
if (do_node_check_archive_ready(conn, runtime_options.output_mode, &status_list) != CHECK_STATUS_OK)
|
|
issue_detected = true;
|
|
|
|
if (do_node_check_upstream(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
|
issue_detected = true;
|
|
|
|
if (do_node_check_downstream(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
|
issue_detected = true;
|
|
|
|
if (do_node_check_slots(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
|
issue_detected = true;
|
|
|
|
if (do_node_check_missing_slots(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
|
issue_detected = true;
|
|
|
|
if (do_node_check_data_directory(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
|
|
issue_detected = true;
|
|
|
|
if (runtime_options.output_mode == OM_CSV)
|
|
{
|
|
appendPQExpBuffer(&output,
|
|
"\"Node name\",\"%s\"\n",
|
|
node_info.node_name);
|
|
|
|
appendPQExpBuffer(&output,
|
|
"\"Node ID\",\"%i\"\n",
|
|
node_info.node_id);
|
|
|
|
for (cell = status_list.head; cell; cell = cell->next)
|
|
{
|
|
appendPQExpBuffer(&output,
|
|
"\"%s\",\"%s\"",
|
|
cell->item,
|
|
output_check_status(cell->status));
|
|
|
|
if (strlen(cell->details))
|
|
{
|
|
appendPQExpBuffer(&output,
|
|
",\"%s\"",
|
|
cell->details);
|
|
}
|
|
appendPQExpBufferChar(&output, '\n');
|
|
}
|
|
}
|
|
else
|
|
{
|
|
appendPQExpBuffer(&output,
|
|
"Node \"%s\":\n",
|
|
node_info.node_name);
|
|
|
|
for (cell = status_list.head; cell; cell = cell->next)
|
|
{
|
|
appendPQExpBuffer(&output,
|
|
"\t%s: %s",
|
|
cell->item,
|
|
output_check_status(cell->status));
|
|
|
|
if (strlen(cell->details))
|
|
{
|
|
appendPQExpBuffer(&output,
|
|
" (%s)",
|
|
cell->details);
|
|
}
|
|
appendPQExpBufferChar(&output, '\n');
|
|
}
|
|
}
|
|
|
|
|
|
printf("%s", output.data);
|
|
termPQExpBuffer(&output);
|
|
check_status_list_free(&status_list);
|
|
|
|
PQfinish(conn);
|
|
|
|
if (issue_detected == true)
|
|
{
|
|
exit(ERR_NODE_STATUS);
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
do_node_check_replication_connection(void)
|
|
{
|
|
PGconn *local_conn = NULL;
|
|
PGconn *repl_conn = NULL;
|
|
t_node_info node_record = T_NODE_INFO_INITIALIZER;
|
|
RecordStatus record_status = RECORD_NOT_FOUND;
|
|
PQExpBufferData output;
|
|
|
|
|
|
initPQExpBuffer(&output);
|
|
appendPQExpBufferStr(&output,
|
|
"--connection=");
|
|
|
|
if (runtime_options.remote_node_id == UNKNOWN_NODE_ID)
|
|
{
|
|
appendPQExpBufferStr(&output, "UNKNOWN");
|
|
printf("%s\n", output.data);
|
|
termPQExpBuffer(&output);
|
|
return;
|
|
}
|
|
|
|
/* retrieve remote node record from local database */
|
|
local_conn = establish_db_connection(config_file_options.conninfo, false);
|
|
|
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
|
{
|
|
appendPQExpBufferStr(&output, "CONNECTION_ERROR");
|
|
printf("%s\n", output.data);
|
|
termPQExpBuffer(&output);
|
|
return;
|
|
}
|
|
|
|
record_status = get_node_record(local_conn, runtime_options.remote_node_id, &node_record);
|
|
PQfinish(local_conn);
|
|
|
|
if (record_status != RECORD_FOUND)
|
|
{
|
|
appendPQExpBufferStr(&output, "UNKNOWN");
|
|
printf("%s\n", output.data);
|
|
termPQExpBuffer(&output);
|
|
return;
|
|
}
|
|
|
|
repl_conn = establish_replication_connection_from_conninfo(node_record.conninfo,
|
|
node_record.repluser);
|
|
|
|
if (PQstatus(repl_conn) != CONNECTION_OK)
|
|
{
|
|
appendPQExpBufferStr(&output, "BAD");
|
|
printf("%s\n", output.data);
|
|
termPQExpBuffer(&output);
|
|
return;
|
|
}
|
|
|
|
PQfinish(repl_conn);
|
|
|
|
appendPQExpBufferStr(&output, "OK");
|
|
printf("%s\n", output.data);
|
|
termPQExpBuffer(&output);
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
static CheckStatus
|
|
do_node_check_archive_ready(PGconn *conn, OutputMode mode, CheckStatusList *list_output)
|
|
{
|
|
int ready_archive_files = 0;
|
|
CheckStatus status = CHECK_STATUS_UNKNOWN;
|
|
PQExpBufferData details;
|
|
|
|
if (mode == OM_CSV && list_output == NULL)
|
|
{
|
|
log_error(_("--csv output not provided with --archive-ready option"));
|
|
PQfinish(conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
initPQExpBuffer(&details);
|
|
|
|
ready_archive_files = get_ready_archive_files(conn, config_file_options.data_directory);
|
|
|
|
if (ready_archive_files > config_file_options.archive_ready_critical)
|
|
{
|
|
status = CHECK_STATUS_CRITICAL;
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_OPTFORMAT:
|
|
appendPQExpBuffer(&details,
|
|
"--files=%i --threshold=%i",
|
|
ready_archive_files, config_file_options.archive_ready_critical);
|
|
break;
|
|
case OM_NAGIOS:
|
|
appendPQExpBuffer(&details,
|
|
"%i pending archive ready files | files=%i;%i;%i",
|
|
ready_archive_files,
|
|
ready_archive_files,
|
|
config_file_options.archive_ready_warning,
|
|
config_file_options.archive_ready_critical);
|
|
break;
|
|
case OM_TEXT:
|
|
appendPQExpBuffer(&details,
|
|
"%i pending archive ready files, critical threshold: %i",
|
|
ready_archive_files, config_file_options.archive_ready_critical);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
else if (ready_archive_files > config_file_options.archive_ready_warning)
|
|
{
|
|
status = CHECK_STATUS_WARNING;
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_OPTFORMAT:
|
|
appendPQExpBuffer(&details,
|
|
"--files=%i --threshold=%i",
|
|
ready_archive_files, config_file_options.archive_ready_warning);
|
|
break;
|
|
case OM_NAGIOS:
|
|
appendPQExpBuffer(&details,
|
|
"%i pending archive ready files | files=%i;%i;%i",
|
|
ready_archive_files,
|
|
ready_archive_files,
|
|
config_file_options.archive_ready_warning,
|
|
config_file_options.archive_ready_critical);
|
|
|
|
break;
|
|
case OM_TEXT:
|
|
appendPQExpBuffer(&details,
|
|
"%i pending archive ready files (threshold: %i)",
|
|
ready_archive_files, config_file_options.archive_ready_warning);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
else if (ready_archive_files < 0)
|
|
{
|
|
status = CHECK_STATUS_UNKNOWN;
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_OPTFORMAT:
|
|
break;
|
|
case OM_NAGIOS:
|
|
case OM_TEXT:
|
|
appendPQExpBufferStr(&details,
|
|
"unable to check archive_status directory");
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
status = CHECK_STATUS_OK;
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_OPTFORMAT:
|
|
appendPQExpBuffer(&details,
|
|
"--files=%i", ready_archive_files);
|
|
break;
|
|
case OM_NAGIOS:
|
|
appendPQExpBuffer(&details,
|
|
"%i pending archive ready files | files=%i;%i;%i",
|
|
ready_archive_files,
|
|
ready_archive_files,
|
|
config_file_options.archive_ready_warning,
|
|
config_file_options.archive_ready_critical);
|
|
break;
|
|
case OM_TEXT:
|
|
appendPQExpBuffer(&details,
|
|
"%i pending archive ready files", ready_archive_files);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_OPTFORMAT:
|
|
{
|
|
printf("--status=%s %s\n",
|
|
output_check_status(status),
|
|
details.data);
|
|
}
|
|
break;
|
|
case OM_NAGIOS:
|
|
printf("REPMGR_ARCHIVE_READY %s: %s\n",
|
|
output_check_status(status),
|
|
details.data);
|
|
break;
|
|
case OM_CSV:
|
|
case OM_TEXT:
|
|
if (list_output != NULL)
|
|
{
|
|
check_status_list_set(list_output,
|
|
"WAL archiving",
|
|
status,
|
|
details.data);
|
|
}
|
|
else
|
|
{
|
|
printf("%s (%s)\n",
|
|
output_check_status(status),
|
|
details.data);
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
termPQExpBuffer(&details);
|
|
return status;
|
|
}
|
|
|
|
|
|
static CheckStatus
|
|
do_node_check_downstream(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
|
{
|
|
NodeInfoList downstream_nodes = T_NODE_INFO_LIST_INITIALIZER;
|
|
NodeInfoListCell *cell = NULL;
|
|
int missing_nodes_count = 0;
|
|
int expected_nodes_count = 0;
|
|
CheckStatus status = CHECK_STATUS_OK;
|
|
ItemList missing_nodes = {NULL, NULL};
|
|
ItemList attached_nodes = {NULL, NULL};
|
|
PQExpBufferData details;
|
|
|
|
if (mode == OM_CSV && list_output == NULL)
|
|
{
|
|
log_error(_("--csv output not provided with --downstream option"));
|
|
PQfinish(conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
initPQExpBuffer(&details);
|
|
|
|
get_downstream_node_records(conn, config_file_options.node_id, &downstream_nodes);
|
|
|
|
/* if a witness node is present, we'll need to remove this from the total */
|
|
expected_nodes_count = downstream_nodes.node_count;
|
|
|
|
for (cell = downstream_nodes.head; cell; cell = cell->next)
|
|
{
|
|
/* skip witness server */
|
|
if (cell->node_info->type == WITNESS)
|
|
{
|
|
expected_nodes_count --;
|
|
continue;
|
|
}
|
|
|
|
if (is_downstream_node_attached_quiet(conn, cell->node_info->node_name, NULL) != NODE_ATTACHED)
|
|
{
|
|
missing_nodes_count++;
|
|
item_list_append_format(&missing_nodes,
|
|
"%s (ID: %i)",
|
|
cell->node_info->node_name,
|
|
cell->node_info->node_id);
|
|
}
|
|
else
|
|
{
|
|
item_list_append_format(&attached_nodes,
|
|
"%s (ID: %i)",
|
|
cell->node_info->node_name,
|
|
cell->node_info->node_id);
|
|
}
|
|
}
|
|
|
|
if (node_info->type == WITNESS)
|
|
{
|
|
/* witness is not connecting to any upstream */
|
|
appendPQExpBufferStr(&details,
|
|
_("N/A - node is a witness"));
|
|
}
|
|
else if (missing_nodes_count == 0)
|
|
{
|
|
if (expected_nodes_count == 0)
|
|
appendPQExpBufferStr(&details,
|
|
"this node has no downstream nodes");
|
|
else
|
|
appendPQExpBuffer(&details,
|
|
"%i of %i downstream nodes attached",
|
|
expected_nodes_count - missing_nodes_count,
|
|
expected_nodes_count);
|
|
}
|
|
else
|
|
{
|
|
ItemListCell *missing_cell = NULL;
|
|
bool first = true;
|
|
|
|
status = CHECK_STATUS_CRITICAL;
|
|
|
|
appendPQExpBuffer(&details,
|
|
"%i of %i downstream nodes not attached",
|
|
missing_nodes_count,
|
|
expected_nodes_count);
|
|
|
|
if (mode != OM_NAGIOS)
|
|
{
|
|
appendPQExpBufferStr(&details, "; missing: ");
|
|
|
|
for (missing_cell = missing_nodes.head; missing_cell; missing_cell = missing_cell->next)
|
|
{
|
|
if (first == false)
|
|
appendPQExpBufferStr(&details,
|
|
", ");
|
|
else
|
|
first = false;
|
|
|
|
if (first == false)
|
|
appendPQExpBufferStr(&details, missing_cell->string);
|
|
}
|
|
}
|
|
}
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_NAGIOS:
|
|
{
|
|
if (missing_nodes_count)
|
|
{
|
|
ItemListCell *missing_cell = NULL;
|
|
bool first = true;
|
|
|
|
appendPQExpBufferStr(&details, " (missing: ");
|
|
|
|
for (missing_cell = missing_nodes.head; missing_cell; missing_cell = missing_cell->next)
|
|
{
|
|
if (first == false)
|
|
appendPQExpBufferStr(&details, ", ");
|
|
else
|
|
first = false;
|
|
|
|
if (first == false)
|
|
appendPQExpBufferStr(&details, missing_cell->string);
|
|
}
|
|
|
|
appendPQExpBufferChar(&details, ')');
|
|
}
|
|
|
|
printf("REPMGR_DOWNSTREAM_SERVERS %s: %s | attached=%i, missing=%i\n",
|
|
output_check_status(status),
|
|
details.data,
|
|
expected_nodes_count - missing_nodes_count,
|
|
missing_nodes_count);
|
|
}
|
|
break;
|
|
case OM_CSV:
|
|
case OM_TEXT:
|
|
if (list_output != NULL)
|
|
{
|
|
check_status_list_set(list_output,
|
|
"Downstream servers",
|
|
status,
|
|
details.data);
|
|
}
|
|
else
|
|
{
|
|
printf("%s (%s)\n",
|
|
output_check_status(status),
|
|
details.data);
|
|
}
|
|
default:
|
|
break;
|
|
|
|
}
|
|
termPQExpBuffer(&details);
|
|
clear_node_info_list(&downstream_nodes);
|
|
return status;
|
|
}
|
|
|
|
|
|
static CheckStatus
|
|
do_node_check_upstream(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
|
{
|
|
PGconn *upstream_conn = NULL;
|
|
t_node_info upstream_node_info = T_NODE_INFO_INITIALIZER;
|
|
PQExpBufferData details;
|
|
|
|
CheckStatus status = CHECK_STATUS_OK;
|
|
|
|
if (mode == OM_CSV && list_output == NULL)
|
|
{
|
|
log_error(_("--csv output not provided with --upstream option"));
|
|
PQfinish(conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
initPQExpBuffer(&details);
|
|
|
|
if (node_info->type == WITNESS)
|
|
{
|
|
/* witness is not connecting to any upstream */
|
|
appendPQExpBufferStr(&details,
|
|
_("N/A - node is a witness"));
|
|
}
|
|
else if (get_node_record(conn, node_info->upstream_node_id, &upstream_node_info) != RECORD_FOUND)
|
|
{
|
|
if (get_recovery_type(conn) == RECTYPE_STANDBY)
|
|
{
|
|
appendPQExpBuffer(&details,
|
|
_("node \"%s\" (ID: %i) is a standby but no upstream record found"),
|
|
node_info->node_name,
|
|
node_info->node_id);
|
|
status = CHECK_STATUS_CRITICAL;
|
|
}
|
|
else
|
|
{
|
|
appendPQExpBufferStr(&details,
|
|
_("N/A - node is primary"));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
upstream_conn = establish_db_connection(upstream_node_info.conninfo, true);
|
|
|
|
/* check our node is connected */
|
|
if (is_downstream_node_attached(upstream_conn, config_file_options.node_name, NULL) != NODE_ATTACHED)
|
|
{
|
|
appendPQExpBuffer(&details,
|
|
_("node \"%s\" (ID: %i) is not attached to expected upstream node \"%s\" (ID: %i)"),
|
|
node_info->node_name,
|
|
node_info->node_id,
|
|
upstream_node_info.node_name,
|
|
upstream_node_info.node_id);
|
|
status = CHECK_STATUS_CRITICAL;
|
|
}
|
|
else
|
|
{
|
|
appendPQExpBuffer(&details,
|
|
_("node \"%s\" (ID: %i) is attached to expected upstream node \"%s\" (ID: %i)"),
|
|
node_info->node_name,
|
|
node_info->node_id,
|
|
upstream_node_info.node_name,
|
|
upstream_node_info.node_id);
|
|
}
|
|
}
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_NAGIOS:
|
|
{
|
|
printf("REPMGR_UPSTREAM_SERVER %s: %s\n",
|
|
output_check_status(status),
|
|
details.data);
|
|
}
|
|
break;
|
|
case OM_TEXT:
|
|
if (list_output != NULL)
|
|
{
|
|
check_status_list_set(list_output,
|
|
"Upstream connection",
|
|
status,
|
|
details.data);
|
|
}
|
|
else
|
|
{
|
|
printf("%s (%s)\n",
|
|
output_check_status(status),
|
|
details.data);
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
termPQExpBuffer(&details);
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
static CheckStatus
|
|
do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
|
{
|
|
CheckStatus status = CHECK_STATUS_OK;
|
|
int lag_seconds = 0;
|
|
PQExpBufferData details;
|
|
|
|
if (mode == OM_CSV && list_output == NULL)
|
|
{
|
|
log_error(_("--csv output not provided with --replication-lag option"));
|
|
PQfinish(conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
initPQExpBuffer(&details);
|
|
|
|
if (node_info->recovery_type == RECTYPE_PRIMARY)
|
|
{
|
|
switch (mode)
|
|
{
|
|
case OM_OPTFORMAT:
|
|
appendPQExpBufferStr(&details,
|
|
"--lag=0");
|
|
break;
|
|
case OM_NAGIOS:
|
|
appendPQExpBuffer(&details,
|
|
"0 seconds | lag=0;%i;%i",
|
|
config_file_options.replication_lag_warning,
|
|
config_file_options.replication_lag_critical);
|
|
break;
|
|
case OM_TEXT:
|
|
if (node_info->type == WITNESS)
|
|
{
|
|
appendPQExpBufferStr(&details,
|
|
"N/A - node is witness");
|
|
}
|
|
else
|
|
{
|
|
appendPQExpBufferStr(&details,
|
|
"N/A - node is primary");
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
lag_seconds = get_replication_lag_seconds(conn);
|
|
|
|
log_debug("lag seconds: %i", lag_seconds);
|
|
|
|
if (lag_seconds >= config_file_options.replication_lag_critical)
|
|
{
|
|
status = CHECK_STATUS_CRITICAL;
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_OPTFORMAT:
|
|
appendPQExpBuffer(&details,
|
|
"--lag=%i --threshold=%i",
|
|
lag_seconds, config_file_options.replication_lag_critical);
|
|
break;
|
|
case OM_NAGIOS:
|
|
appendPQExpBuffer(&details,
|
|
"%i seconds | lag=%i;%i;%i",
|
|
lag_seconds,
|
|
lag_seconds,
|
|
config_file_options.replication_lag_warning,
|
|
config_file_options.replication_lag_critical);
|
|
break;
|
|
case OM_TEXT:
|
|
appendPQExpBuffer(&details,
|
|
"%i seconds, critical threshold: %i)",
|
|
lag_seconds, config_file_options.replication_lag_critical);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
else if (lag_seconds > config_file_options.replication_lag_warning)
|
|
{
|
|
status = CHECK_STATUS_WARNING;
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_OPTFORMAT:
|
|
appendPQExpBuffer(&details,
|
|
"--lag=%i --threshold=%i",
|
|
lag_seconds, config_file_options.replication_lag_warning);
|
|
break;
|
|
case OM_NAGIOS:
|
|
appendPQExpBuffer(&details,
|
|
"%i seconds | lag=%i;%i;%i",
|
|
lag_seconds,
|
|
lag_seconds,
|
|
config_file_options.replication_lag_warning,
|
|
config_file_options.replication_lag_critical);
|
|
break;
|
|
case OM_TEXT:
|
|
appendPQExpBuffer(&details,
|
|
"%i seconds, warning threshold: %i)",
|
|
lag_seconds, config_file_options.replication_lag_warning);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
else if (lag_seconds == UNKNOWN_REPLICATION_LAG)
|
|
{
|
|
status = CHECK_STATUS_UNKNOWN;
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_OPTFORMAT:
|
|
break;
|
|
case OM_NAGIOS:
|
|
case OM_TEXT:
|
|
appendPQExpBufferStr(&details,
|
|
"unable to query replication lag");
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
status = CHECK_STATUS_OK;
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_OPTFORMAT:
|
|
appendPQExpBuffer(&details,
|
|
"--lag=%i",
|
|
lag_seconds);
|
|
break;
|
|
case OM_NAGIOS:
|
|
appendPQExpBuffer(&details,
|
|
"%i seconds | lag=%i;%i;%i",
|
|
lag_seconds,
|
|
lag_seconds,
|
|
config_file_options.replication_lag_warning,
|
|
config_file_options.replication_lag_critical);
|
|
break;
|
|
case OM_TEXT:
|
|
appendPQExpBuffer(&details,
|
|
"%i seconds",
|
|
lag_seconds);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_OPTFORMAT:
|
|
printf("--status=%s %s\n",
|
|
output_check_status(status),
|
|
details.data);
|
|
break;
|
|
case OM_NAGIOS:
|
|
printf("REPMGR_REPLICATION_LAG %s: %s\n",
|
|
output_check_status(status),
|
|
details.data);
|
|
break;
|
|
case OM_CSV:
|
|
case OM_TEXT:
|
|
if (list_output != NULL)
|
|
{
|
|
check_status_list_set(list_output,
|
|
"Replication lag",
|
|
status,
|
|
details.data);
|
|
}
|
|
else
|
|
{
|
|
printf("%s (%s)\n",
|
|
output_check_status(status),
|
|
details.data);
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
termPQExpBuffer(&details);
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
static CheckStatus
|
|
do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
|
{
|
|
|
|
CheckStatus status = CHECK_STATUS_OK;
|
|
PQExpBufferData details;
|
|
RecoveryType recovery_type = get_recovery_type(conn);
|
|
|
|
if (mode == OM_CSV && list_output == NULL)
|
|
{
|
|
log_error(_("--csv output not provided with --role option"));
|
|
PQfinish(conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
initPQExpBuffer(&details);
|
|
|
|
switch (node_info->type)
|
|
{
|
|
case PRIMARY:
|
|
if (recovery_type == RECTYPE_STANDBY)
|
|
{
|
|
status = CHECK_STATUS_CRITICAL;
|
|
appendPQExpBufferStr(&details,
|
|
_("node is registered as primary but running as standby"));
|
|
}
|
|
else
|
|
{
|
|
appendPQExpBufferStr(&details,
|
|
_("node is primary"));
|
|
}
|
|
break;
|
|
case STANDBY:
|
|
if (recovery_type == RECTYPE_PRIMARY)
|
|
{
|
|
status = CHECK_STATUS_CRITICAL;
|
|
appendPQExpBufferStr(&details,
|
|
_("node is registered as standby but running as primary"));
|
|
}
|
|
else
|
|
{
|
|
appendPQExpBufferStr(&details,
|
|
_("node is standby"));
|
|
}
|
|
break;
|
|
case WITNESS:
|
|
if (recovery_type == RECTYPE_STANDBY)
|
|
{
|
|
status = CHECK_STATUS_CRITICAL;
|
|
appendPQExpBufferStr(&details,
|
|
_("node is registered as witness but running as standby"));
|
|
}
|
|
else
|
|
{
|
|
appendPQExpBufferStr(&details,
|
|
_("node is witness"));
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_NAGIOS:
|
|
printf("REPMGR_SERVER_ROLE %s: %s\n",
|
|
output_check_status(status),
|
|
details.data);
|
|
break;
|
|
case OM_CSV:
|
|
case OM_TEXT:
|
|
if (list_output != NULL)
|
|
{
|
|
check_status_list_set(list_output,
|
|
"Server role",
|
|
status,
|
|
details.data);
|
|
}
|
|
else
|
|
{
|
|
printf("%s (%s)\n",
|
|
output_check_status(status),
|
|
details.data);
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
termPQExpBuffer(&details);
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
static CheckStatus
|
|
do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
|
{
|
|
CheckStatus status = CHECK_STATUS_OK;
|
|
PQExpBufferData details;
|
|
|
|
if (mode == OM_CSV && list_output == NULL)
|
|
{
|
|
log_error(_("--csv output not provided with --slots option"));
|
|
PQfinish(conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
initPQExpBuffer(&details);
|
|
|
|
if (node_info->total_replication_slots == 0)
|
|
{
|
|
appendPQExpBufferStr(&details,
|
|
_("node has no physical replication slots"));
|
|
}
|
|
else if (node_info->inactive_replication_slots == 0)
|
|
{
|
|
appendPQExpBuffer(&details,
|
|
_("%i of %i physical replication slots are active"),
|
|
node_info->total_replication_slots,
|
|
node_info->total_replication_slots);
|
|
}
|
|
else if (node_info->inactive_replication_slots > 0)
|
|
{
|
|
status = CHECK_STATUS_CRITICAL;
|
|
|
|
appendPQExpBuffer(&details,
|
|
_("%i of %i physical replication slots are inactive"),
|
|
node_info->inactive_replication_slots,
|
|
node_info->total_replication_slots);
|
|
}
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_NAGIOS:
|
|
printf("REPMGR_INACTIVE_SLOTS %s: %s | slots=%i;%i\n",
|
|
output_check_status(status),
|
|
details.data,
|
|
node_info->total_replication_slots,
|
|
node_info->inactive_replication_slots);
|
|
break;
|
|
case OM_CSV:
|
|
case OM_TEXT:
|
|
if (list_output != NULL)
|
|
{
|
|
check_status_list_set(list_output,
|
|
"Replication slots",
|
|
status,
|
|
details.data);
|
|
}
|
|
else
|
|
{
|
|
printf("%s (%s)\n",
|
|
output_check_status(status),
|
|
details.data);
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
termPQExpBuffer(&details);
|
|
return status;
|
|
}
|
|
|
|
|
|
static CheckStatus
|
|
do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
|
{
|
|
CheckStatus status = CHECK_STATUS_OK;
|
|
PQExpBufferData details;
|
|
NodeInfoList missing_slots = T_NODE_INFO_LIST_INITIALIZER;
|
|
|
|
if (mode == OM_CSV && list_output == NULL)
|
|
{
|
|
log_error(_("--csv output not provided with --missing-slots option"));
|
|
PQfinish(conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
initPQExpBuffer(&details);
|
|
|
|
get_downstream_nodes_with_missing_slot(conn,
|
|
config_file_options.node_id,
|
|
&missing_slots);
|
|
|
|
if (missing_slots.node_count == 0)
|
|
{
|
|
appendPQExpBufferStr(&details,
|
|
_("node has no missing physical replication slots"));
|
|
}
|
|
else
|
|
{
|
|
NodeInfoListCell *missing_slot_cell = NULL;
|
|
bool first_element = true;
|
|
|
|
status = CHECK_STATUS_CRITICAL;
|
|
|
|
appendPQExpBuffer(&details,
|
|
_("%i physical replication slots are missing"),
|
|
missing_slots.node_count);
|
|
|
|
if (missing_slots.node_count)
|
|
{
|
|
appendPQExpBufferStr(&details, ": ");
|
|
|
|
for (missing_slot_cell = missing_slots.head; missing_slot_cell; missing_slot_cell = missing_slot_cell->next)
|
|
{
|
|
if (first_element == true)
|
|
{
|
|
first_element = false;
|
|
}
|
|
else
|
|
{
|
|
appendPQExpBufferStr(&details, ", ");
|
|
}
|
|
|
|
appendPQExpBufferStr(&details, missing_slot_cell->node_info->slot_name);
|
|
}
|
|
}
|
|
}
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_NAGIOS:
|
|
{
|
|
printf("REPMGR_MISSING_SLOTS %s: %s | missing_slots=%i",
|
|
output_check_status(status),
|
|
details.data,
|
|
missing_slots.node_count);
|
|
|
|
if (missing_slots.node_count)
|
|
{
|
|
NodeInfoListCell *missing_slot_cell = NULL;
|
|
bool first_element = true;
|
|
|
|
printf(";");
|
|
|
|
for (missing_slot_cell = missing_slots.head; missing_slot_cell; missing_slot_cell = missing_slot_cell->next)
|
|
{
|
|
if (first_element == true)
|
|
{
|
|
first_element = false;
|
|
}
|
|
else
|
|
{
|
|
printf(",");
|
|
}
|
|
printf("%s", missing_slot_cell->node_info->slot_name);
|
|
}
|
|
}
|
|
printf("\n");
|
|
break;
|
|
}
|
|
case OM_CSV:
|
|
case OM_TEXT:
|
|
if (list_output != NULL)
|
|
{
|
|
check_status_list_set(list_output,
|
|
"Missing physical replication slots",
|
|
status,
|
|
details.data);
|
|
}
|
|
else
|
|
{
|
|
printf("%s (%s)\n",
|
|
output_check_status(status),
|
|
details.data);
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
clear_node_info_list(&missing_slots);
|
|
|
|
termPQExpBuffer(&details);
|
|
return status;
|
|
}
|
|
|
|
CheckStatus
|
|
do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
|
{
|
|
CheckStatus status = CHECK_STATUS_OK;
|
|
char actual_data_directory[MAXPGPATH] = "";
|
|
PQExpBufferData details;
|
|
|
|
if (mode == OM_CSV && list_output == NULL)
|
|
{
|
|
log_error(_("--csv output not provided with --data-directory-config option"));
|
|
PQfinish(conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
initPQExpBuffer(&details);
|
|
|
|
/*
|
|
* Check actual data directory matches that in repmgr.conf; note this requires
|
|
* a superuser connection
|
|
*/
|
|
if (connection_has_pg_monitor_role(conn, "pg_read_all_settings") == true)
|
|
{
|
|
/* we expect to have a database connection */
|
|
if (get_pg_setting(conn, "data_directory", actual_data_directory) == false)
|
|
{
|
|
appendPQExpBuffer(&details,
|
|
_("unable to determine current \"data_directory\""));
|
|
status = CHECK_STATUS_UNKNOWN;
|
|
}
|
|
|
|
if (strncmp(actual_data_directory, config_file_options.data_directory, MAXPGPATH) != 0)
|
|
{
|
|
if (mode != OM_NAGIOS)
|
|
{
|
|
appendPQExpBuffer(&details,
|
|
_("configured \"data_directory\" is \"%s\"; "),
|
|
config_file_options.data_directory);
|
|
}
|
|
|
|
appendPQExpBuffer(&details,
|
|
"actual data directory is \"%s\"",
|
|
actual_data_directory);
|
|
|
|
status = CHECK_STATUS_CRITICAL;
|
|
}
|
|
else
|
|
{
|
|
appendPQExpBuffer(&details,
|
|
_("configured \"data_directory\" is \"%s\""),
|
|
config_file_options.data_directory);
|
|
}
|
|
}
|
|
/*
|
|
* If no superuser connection available, sanity-check that the configuration directory looks
|
|
* like a PostgreSQL directory and hope it's the right one.
|
|
*/
|
|
else
|
|
{
|
|
if (mode == OM_TEXT)
|
|
{
|
|
log_info(_("connection is not a superuser connection, falling back to simple check"));
|
|
|
|
if (PQserverVersion(conn) >= 100000)
|
|
{
|
|
log_hint(_("provide a superuser with -S/--superuser, or add the \"%s\" user to role \"pg_read_all_settings\" or \"pg_monitor\""),
|
|
PQuser(conn));
|
|
}
|
|
}
|
|
|
|
if (is_pg_dir(config_file_options.data_directory) == false)
|
|
{
|
|
if (mode == OM_NAGIOS)
|
|
{
|
|
appendPQExpBufferStr(&details,
|
|
_("configured \"data_directory\" is not a PostgreSQL data directory"));
|
|
}
|
|
else
|
|
{
|
|
appendPQExpBuffer(&details,
|
|
_("configured \"data_directory\" \"%s\" is not a PostgreSQL data directory"),
|
|
actual_data_directory);
|
|
}
|
|
|
|
status = CHECK_STATUS_CRITICAL;
|
|
}
|
|
else
|
|
{
|
|
appendPQExpBuffer(&details,
|
|
_("configured \"data_directory\" is \"%s\""),
|
|
config_file_options.data_directory);
|
|
}
|
|
}
|
|
|
|
switch (mode)
|
|
{
|
|
case OM_OPTFORMAT:
|
|
printf("--configured-data-directory=%s\n",
|
|
output_check_status(status));
|
|
break;
|
|
case OM_NAGIOS:
|
|
printf("REPMGR_DATA_DIRECTORY %s: %s",
|
|
output_check_status(status),
|
|
config_file_options.data_directory);
|
|
|
|
if (status == CHECK_STATUS_CRITICAL)
|
|
{
|
|
printf(" | %s", details.data);
|
|
}
|
|
puts("");
|
|
break;
|
|
case OM_CSV:
|
|
case OM_TEXT:
|
|
if (list_output != NULL)
|
|
{
|
|
check_status_list_set(list_output,
|
|
"Configured data directory",
|
|
status,
|
|
details.data);
|
|
}
|
|
else
|
|
{
|
|
printf("%s (%s)\n",
|
|
output_check_status(status),
|
|
details.data);
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
termPQExpBuffer(&details);
|
|
|
|
return status;
|
|
}
|
|
|
|
CheckStatus
|
|
do_node_check_repmgrd(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
|
{
|
|
CheckStatus status = CHECK_STATUS_OK;
|
|
|
|
if (mode == OM_CSV && list_output == NULL)
|
|
{
|
|
log_error(_("--csv output not provided with --repmgrd option"));
|
|
PQfinish(conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
status = get_repmgrd_status(conn);
|
|
switch (mode)
|
|
{
|
|
case OM_OPTFORMAT:
|
|
printf("--repmgrd=%s\n",
|
|
output_check_status(status));
|
|
break;
|
|
case OM_NAGIOS:
|
|
printf("REPMGRD %s: %s\n",
|
|
output_check_status(status),
|
|
output_repmgrd_status(status));
|
|
|
|
break;
|
|
case OM_CSV:
|
|
case OM_TEXT:
|
|
if (list_output != NULL)
|
|
{
|
|
check_status_list_set(list_output,
|
|
"repmgrd",
|
|
status,
|
|
output_repmgrd_status(status));
|
|
}
|
|
else
|
|
{
|
|
printf("%s (%s)\n",
|
|
output_check_status(status),
|
|
output_repmgrd_status(status));
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
* This is not included in the general list output
|
|
*/
|
|
static
|
|
CheckStatus do_node_check_replication_config_owner(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
|
|
{
|
|
CheckStatus status = CHECK_STATUS_OK;
|
|
|
|
PQExpBufferData errmsg;
|
|
PQExpBufferData details;
|
|
|
|
if (mode != OM_OPTFORMAT)
|
|
{
|
|
log_error(_("--replication-config-owner option can only be used with --optformat"));
|
|
PQfinish(conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
initPQExpBuffer(&errmsg);
|
|
initPQExpBuffer(&details);
|
|
|
|
if (check_replication_config_owner(PQserverVersion(conn),
|
|
config_file_options.data_directory,
|
|
&errmsg, &details) == false)
|
|
{
|
|
status = CHECK_STATUS_CRITICAL;
|
|
}
|
|
|
|
printf("--replication-config-owner=%s\n",
|
|
output_check_status(status));
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
/*
|
|
* This is not included in the general list output
|
|
*/
|
|
static CheckStatus
|
|
do_node_check_db_connection(PGconn *conn, OutputMode mode)
|
|
{
|
|
CheckStatus status = CHECK_STATUS_OK;
|
|
PQExpBufferData details;
|
|
|
|
if (mode == OM_CSV)
|
|
{
|
|
log_error(_("--csv output not provided with --db-connection option"));
|
|
PQfinish(conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
/* This check is for configuration diagnostics only */
|
|
if (mode == OM_NAGIOS)
|
|
{
|
|
log_error(_("--nagios output not provided with --db-connection option"));
|
|
PQfinish(conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
initPQExpBuffer(&details);
|
|
|
|
if (PQstatus(conn) != CONNECTION_OK)
|
|
{
|
|
t_conninfo_param_list conninfo = T_CONNINFO_PARAM_LIST_INITIALIZER;
|
|
int c;
|
|
|
|
status = CHECK_STATUS_CRITICAL;
|
|
initialize_conninfo_params(&conninfo, false);
|
|
conn_to_param_list(conn, &conninfo);
|
|
|
|
appendPQExpBufferStr(&details,
|
|
"connection parameters used:");
|
|
for (c = 0; c < conninfo.size && conninfo.keywords[c] != NULL; c++)
|
|
{
|
|
if (conninfo.values[c] != NULL && conninfo.values[c][0] != '\0')
|
|
{
|
|
appendPQExpBuffer(&details,
|
|
" %s=%s",
|
|
conninfo.keywords[c], conninfo.values[c]);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
if (mode == OM_OPTFORMAT)
|
|
{
|
|
printf("--db-connection=%s\n",
|
|
output_check_status(status));
|
|
}
|
|
else if (mode == OM_TEXT)
|
|
{
|
|
printf("%s (%s)\n",
|
|
output_check_status(status),
|
|
details.data);
|
|
}
|
|
termPQExpBuffer(&details);
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
void
|
|
do_node_service(void)
|
|
{
|
|
t_server_action action = ACTION_UNKNOWN;
|
|
char data_dir[MAXPGPATH] = "";
|
|
char command[MAXLEN] = "";
|
|
PQExpBufferData output;
|
|
|
|
action = parse_server_action(runtime_options.action);
|
|
|
|
if (action == ACTION_UNKNOWN)
|
|
{
|
|
log_error(_("unknown value \"%s\" provided for parameter --action"),
|
|
runtime_options.action);
|
|
log_hint(_("valid values are \"start\", \"stop\", \"restart\", \"reload\" and \"promote\""));
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
if (runtime_options.list_actions == true)
|
|
{
|
|
return _do_node_service_list_actions(action);
|
|
}
|
|
|
|
|
|
if (data_dir_required_for_action(action))
|
|
{
|
|
get_node_config_directory(data_dir);
|
|
|
|
if (data_dir[0] == '\0')
|
|
{
|
|
log_error(_("unable to determine data directory for action"));
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
}
|
|
|
|
|
|
if ((action == ACTION_STOP || action == ACTION_RESTART) && runtime_options.checkpoint == true)
|
|
{
|
|
PGconn *conn = NULL;
|
|
|
|
if (config_file_options.conninfo[0] != '\0')
|
|
{
|
|
/*
|
|
* If --superuser option provided, attempt to connect as the specified user
|
|
*/
|
|
if (runtime_options.superuser[0] != '\0')
|
|
{
|
|
conn = establish_db_connection_with_replacement_param(
|
|
config_file_options.conninfo,
|
|
"user",
|
|
runtime_options.superuser,
|
|
true);
|
|
}
|
|
else
|
|
{
|
|
conn = establish_db_connection(config_file_options.conninfo, true);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
conn = establish_db_connection_by_params(&source_conninfo, true);
|
|
}
|
|
|
|
if (can_execute_checkpoint(conn) == false)
|
|
{
|
|
if (runtime_options.dry_run == true)
|
|
{
|
|
log_warning(_("a CHECKPOINT would be issued here but no authorized connection is available"));
|
|
}
|
|
else
|
|
{
|
|
log_warning(_("an authorized connection is required to issue a CHECKPOINT"));
|
|
}
|
|
|
|
if (PQserverVersion(conn) >= 150000)
|
|
{
|
|
log_hint(_("provide a superuser with -S/--superuser or grant pg_checkpoint role to repmgr user"));
|
|
}
|
|
else
|
|
{
|
|
log_hint(_("provide a superuser with -S/--superuser"));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (runtime_options.dry_run == true)
|
|
{
|
|
log_info(_("a CHECKPOINT would be issued here"));
|
|
}
|
|
else
|
|
{
|
|
|
|
log_notice(_("issuing CHECKPOINT on node \"%s\" (ID: %i) "),
|
|
config_file_options.node_name,
|
|
config_file_options.node_id);
|
|
|
|
checkpoint(conn);
|
|
}
|
|
}
|
|
|
|
PQfinish(conn);
|
|
}
|
|
|
|
get_server_action(action, command, data_dir);
|
|
|
|
if (runtime_options.dry_run == true)
|
|
{
|
|
log_info(_("would execute server command \"%s\""), command);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* log level is "DETAIL" here as this command is intended to be executed
|
|
* by another repmgr process (e.g. during standby switchover); that repmgr
|
|
* should emit a "NOTICE" about the intent of the command.
|
|
*/
|
|
log_detail(_("executing server command \"%s\""), command);
|
|
|
|
initPQExpBuffer(&output);
|
|
|
|
if (local_command(command, &output) == false)
|
|
{
|
|
termPQExpBuffer(&output);
|
|
exit(ERR_LOCAL_COMMAND);
|
|
}
|
|
|
|
termPQExpBuffer(&output);
|
|
}
|
|
|
|
|
|
static void
|
|
_do_node_service_list_actions(t_server_action action)
|
|
{
|
|
char command[MAXLEN] = "";
|
|
|
|
char data_dir[MAXPGPATH] = "";
|
|
|
|
bool data_dir_required = false;
|
|
|
|
/* do we need to provide a data directory for any of the actions? */
|
|
if (data_dir_required_for_action(ACTION_START))
|
|
data_dir_required = true;
|
|
|
|
if (data_dir_required_for_action(ACTION_STOP))
|
|
data_dir_required = true;
|
|
|
|
if (data_dir_required_for_action(ACTION_RESTART))
|
|
data_dir_required = true;
|
|
|
|
if (data_dir_required_for_action(ACTION_RELOAD))
|
|
data_dir_required = true;
|
|
|
|
if (data_dir_required_for_action(ACTION_PROMOTE))
|
|
data_dir_required = true;
|
|
|
|
if (data_dir_required == true)
|
|
{
|
|
get_node_config_directory(data_dir);
|
|
}
|
|
|
|
/* show command for specific action only */
|
|
if (action != ACTION_NONE)
|
|
{
|
|
get_server_action(action, command, data_dir);
|
|
printf("%s\n", command);
|
|
return;
|
|
}
|
|
|
|
puts(_("Following commands would be executed for each action:"));
|
|
puts("");
|
|
|
|
get_server_action(ACTION_START, command, data_dir);
|
|
printf(" start: \"%s\"\n", command);
|
|
|
|
get_server_action(ACTION_STOP, command, data_dir);
|
|
printf(" stop: \"%s\"\n", command);
|
|
|
|
get_server_action(ACTION_RESTART, command, data_dir);
|
|
printf(" restart: \"%s\"\n", command);
|
|
|
|
get_server_action(ACTION_RELOAD, command, data_dir);
|
|
printf(" reload: \"%s\"\n", command);
|
|
|
|
get_server_action(ACTION_PROMOTE, command, data_dir);
|
|
printf(" promote: \"%s\"\n", command);
|
|
|
|
puts("");
|
|
|
|
}
|
|
|
|
|
|
static t_server_action
|
|
parse_server_action(const char *action_name)
|
|
{
|
|
if (action_name[0] == '\0')
|
|
return ACTION_NONE;
|
|
|
|
if (strcasecmp(action_name, "start") == 0)
|
|
return ACTION_START;
|
|
|
|
if (strcasecmp(action_name, "stop") == 0)
|
|
return ACTION_STOP;
|
|
|
|
if (strcasecmp(action_name, "restart") == 0)
|
|
return ACTION_RESTART;
|
|
|
|
if (strcasecmp(action_name, "reload") == 0)
|
|
return ACTION_RELOAD;
|
|
|
|
if (strcasecmp(action_name, "promote") == 0)
|
|
return ACTION_PROMOTE;
|
|
|
|
return ACTION_UNKNOWN;
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* Rejoin a dormant (shut down) node to the replication cluster; this
|
|
* is typically a former primary which needs to be demoted to a standby.
|
|
*
|
|
* Note that "repmgr node rejoin" is also executed by
|
|
* "repmgr standby switchover" after promoting the new primary.
|
|
*
|
|
* Parameters:
|
|
* --dry-run
|
|
* --force-rewind[=VALUE]
|
|
* --config-files
|
|
* --config-archive-dir
|
|
* -W/--no-wait
|
|
*/
|
|
void
|
|
do_node_rejoin(void)
|
|
{
|
|
PGconn *upstream_conn = NULL;
|
|
RecoveryType primary_recovery_type = RECTYPE_UNKNOWN;
|
|
PGconn *primary_conn = NULL;
|
|
|
|
DBState db_state;
|
|
PGPing status;
|
|
bool is_shutdown = true;
|
|
int server_version_num = UNKNOWN_SERVER_VERSION_NUM;
|
|
bool hide_standby_signal = false;
|
|
|
|
KeyValueListCell *cell = NULL;
|
|
PQExpBufferData command;
|
|
PQExpBufferData command_output;
|
|
PQExpBufferData follow_output;
|
|
struct stat statbuf;
|
|
t_node_info primary_node_record = T_NODE_INFO_INITIALIZER;
|
|
t_node_info local_node_record = T_NODE_INFO_INITIALIZER;
|
|
|
|
bool success = true;
|
|
int follow_error_code = SUCCESS;
|
|
|
|
/* check node is not actually running */
|
|
status = PQping(config_file_options.conninfo);
|
|
|
|
switch (status)
|
|
{
|
|
case PQPING_NO_ATTEMPT:
|
|
log_error(_("unable to determine status of server"));
|
|
exit(ERR_BAD_CONFIG);
|
|
case PQPING_OK:
|
|
is_shutdown = false;
|
|
break;
|
|
case PQPING_REJECT:
|
|
is_shutdown = false;
|
|
break;
|
|
case PQPING_NO_RESPONSE:
|
|
/* status not yet clear */
|
|
break;
|
|
}
|
|
|
|
if (get_db_state(config_file_options.data_directory, &db_state) == false)
|
|
{
|
|
log_error(_("unable to determine database state from pg_control"));
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
if (is_shutdown == false)
|
|
{
|
|
log_error(_("database is still running in state \"%s\""),
|
|
describe_db_state(db_state));
|
|
log_hint(_("\"repmgr node rejoin\" cannot be executed on a running node"));
|
|
exit(ERR_REJOIN_FAIL);
|
|
}
|
|
|
|
/*
|
|
* Server version number required to determine whether pg_rewind will run
|
|
* crash recovery (Pg 13 and later).
|
|
*/
|
|
server_version_num = get_pg_version(config_file_options.data_directory, NULL);
|
|
|
|
if (server_version_num == UNKNOWN_SERVER_VERSION_NUM)
|
|
{
|
|
/* This is very unlikely to happen */
|
|
log_error(_("unable to determine database version"));
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
log_verbose(LOG_DEBUG, "server version number is: %i", server_version_num);
|
|
|
|
/* check if cleanly shut down */
|
|
if (db_state != DB_SHUTDOWNED && db_state != DB_SHUTDOWNED_IN_RECOVERY)
|
|
{
|
|
if (db_state == DB_SHUTDOWNING)
|
|
{
|
|
log_error(_("database is still shutting down"));
|
|
}
|
|
else if (server_version_num >= 130000 && runtime_options.force_rewind_used == true)
|
|
{
|
|
log_warning(_("database is not shut down cleanly"));
|
|
log_detail(_("--force-rewind provided, pg_rewind will automatically perform recovery"));
|
|
|
|
/*
|
|
* If pg_rewind is executed, the first change it will make
|
|
* is to start the server in single user mode, which will fail
|
|
* in the presence of "standby.signal", so we'll "hide" it
|
|
* (actually delete and recreate).
|
|
*/
|
|
hide_standby_signal = true;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* If the database was not shut down cleanly, it *might* rejoin correctly
|
|
* after starting up and recovering, but better to ensure the database
|
|
* can recover before trying anything else.
|
|
*/
|
|
log_error(_("database is not shut down cleanly"));
|
|
|
|
if (server_version_num >= 130000)
|
|
{
|
|
log_hint(_("provide --force-rewind to run recovery"));
|
|
}
|
|
else
|
|
{
|
|
if (runtime_options.force_rewind_used == true)
|
|
{
|
|
log_detail(_("pg_rewind will not be able to run"));
|
|
}
|
|
log_hint(_("database should be restarted then shut down cleanly after crash recovery completes"));
|
|
}
|
|
|
|
exit(ERR_REJOIN_FAIL);
|
|
}
|
|
}
|
|
|
|
/* check provided upstream connection */
|
|
upstream_conn = establish_db_connection_by_params(&source_conninfo, true);
|
|
|
|
if (get_primary_node_record(upstream_conn, &primary_node_record) == false)
|
|
{
|
|
log_error(_("unable to retrieve primary node record"));
|
|
log_hint(_("check the provided database connection string is for a \"repmgr\" database"));
|
|
PQfinish(upstream_conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
/*
|
|
* Emit a notice about the identity of the rejoin target
|
|
*/
|
|
log_notice(_("rejoin target is node \"%s\" (ID: %i)"),
|
|
primary_node_record.node_name,
|
|
primary_node_record.node_id);
|
|
|
|
/* connect to registered primary and check it's not in recovery */
|
|
primary_conn = establish_db_connection(primary_node_record.conninfo, false);
|
|
|
|
if (PQstatus(primary_conn) != CONNECTION_OK)
|
|
{
|
|
RecoveryType upstream_recovery_type = get_recovery_type(upstream_conn);
|
|
|
|
log_error(_("unable to connect to current registered primary \"%s\" (ID: %i)"),
|
|
primary_node_record.node_name,
|
|
primary_node_record.node_id);
|
|
log_detail(_("registered primary node conninfo is: \"%s\""),
|
|
primary_node_record.conninfo);
|
|
/*
|
|
* Catch case where provided upstream is not in recovery, but is also
|
|
* not registered as primary
|
|
*/
|
|
|
|
if (upstream_recovery_type == RECTYPE_PRIMARY)
|
|
{
|
|
log_warning(_("provided upstream connection string is for a server which is not in recovery, but not registered as primary"));
|
|
log_hint(_("fix repmgr metadata configuration before continuing"));
|
|
}
|
|
|
|
PQfinish(upstream_conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
PQfinish(upstream_conn);
|
|
|
|
primary_recovery_type = get_recovery_type(primary_conn);
|
|
|
|
if (primary_recovery_type != RECTYPE_PRIMARY)
|
|
{
|
|
log_error(_("primary server is registered as node \"%s\" (ID: %i), but server is not a primary"),
|
|
primary_node_record.node_name,
|
|
primary_node_record.node_id);
|
|
/* TODO: hint about checking cluster */
|
|
PQfinish(primary_conn);
|
|
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
/*
|
|
* Fetch the local node record - we'll need this later, and it acts as an
|
|
* additional sanity-check that the node is known to the primary.
|
|
*/
|
|
if (get_node_record(primary_conn, config_file_options.node_id, &local_node_record) != RECORD_FOUND)
|
|
{
|
|
log_error(_("unable to retrieve node record for the local node"));
|
|
log_hint(_("check the local node is registered with the current primary \"%s\" (ID: %i)"),
|
|
primary_node_record.node_name,
|
|
primary_node_record.node_id);
|
|
|
|
PQfinish(primary_conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
/*
|
|
* Sanity-check replication slot availability
|
|
*/
|
|
if (config_file_options.use_replication_slots)
|
|
{
|
|
bool slots_available = check_replication_slots_available(primary_node_record.node_id,
|
|
primary_conn);
|
|
if (slots_available == false)
|
|
{
|
|
PQfinish(primary_conn);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* sanity-check that it will actually be possible to stream from the new upstream
|
|
*/
|
|
{
|
|
bool can_rejoin;
|
|
TimeLineID tli = get_min_recovery_end_timeline(config_file_options.data_directory);
|
|
XLogRecPtr min_recovery_location = get_min_recovery_location(config_file_options.data_directory);
|
|
|
|
/*
|
|
* It's possible this was a former primary, so the minRecoveryPoint*
|
|
* fields may be empty.
|
|
*/
|
|
|
|
if (min_recovery_location == InvalidXLogRecPtr)
|
|
min_recovery_location = get_latest_checkpoint_location(config_file_options.data_directory);
|
|
if (tli == 0)
|
|
tli = get_timeline(config_file_options.data_directory);
|
|
|
|
can_rejoin = check_node_can_attach(tli,
|
|
min_recovery_location,
|
|
primary_conn,
|
|
&primary_node_record,
|
|
true);
|
|
|
|
if (can_rejoin == false)
|
|
{
|
|
PQfinish(primary_conn);
|
|
exit(ERR_REJOIN_FAIL);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* --force-rewind specified - check prerequisites, and attempt to execute
|
|
* (if --dry-run provided, just output the command which would be executed)
|
|
*/
|
|
|
|
if (runtime_options.force_rewind_used == true)
|
|
{
|
|
PQExpBufferData msg;
|
|
PQExpBufferData filebuf;
|
|
int ret;
|
|
|
|
/*
|
|
* Check that pg_rewind can be used
|
|
*/
|
|
|
|
initPQExpBuffer(&msg);
|
|
|
|
if (can_use_pg_rewind(primary_conn, config_file_options.data_directory, &msg) == false)
|
|
{
|
|
log_error(_("--force-rewind specified but pg_rewind cannot be used"));
|
|
log_detail("%s", msg.data);
|
|
termPQExpBuffer(&msg);
|
|
PQfinish(primary_conn);
|
|
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
appendPQExpBufferStr(&msg,
|
|
_("prerequisites for using pg_rewind are met"));
|
|
|
|
if (runtime_options.dry_run == true)
|
|
{
|
|
log_info("%s", msg.data);
|
|
}
|
|
else
|
|
{
|
|
log_verbose(LOG_INFO, "%s", msg.data);
|
|
}
|
|
termPQExpBuffer(&msg);
|
|
|
|
/*
|
|
* Archive requested configuration files.
|
|
*
|
|
* In --dry-run mode this acts as a check that the files can be archived, though
|
|
* errors will only be logged; any copied files will be deleted and --dry-run
|
|
* execution will continue.
|
|
*/
|
|
_do_node_archive_config();
|
|
|
|
/* execute pg_rewind */
|
|
initPQExpBuffer(&command);
|
|
|
|
if (runtime_options.force_rewind_path[0] != '\0')
|
|
{
|
|
appendPQExpBuffer(&command,
|
|
"%s -D ",
|
|
runtime_options.force_rewind_path);
|
|
}
|
|
else
|
|
{
|
|
make_pg_path(&command, "pg_rewind");
|
|
appendPQExpBufferStr(&command,
|
|
" -D ");
|
|
}
|
|
|
|
appendShellString(&command,
|
|
config_file_options.data_directory);
|
|
|
|
appendPQExpBuffer(&command,
|
|
" --source-server='%s'",
|
|
primary_node_record.conninfo);
|
|
|
|
if (runtime_options.dry_run == true)
|
|
{
|
|
log_info(_("pg_rewind would now be executed"));
|
|
log_detail(_("pg_rewind command is:\n %s"),
|
|
command.data);
|
|
}
|
|
else
|
|
{
|
|
log_notice(_("executing pg_rewind"));
|
|
log_detail(_("pg_rewind command is \"%s\""),
|
|
command.data);
|
|
|
|
/*
|
|
* In Pg13 and later, pg_rewind will attempt to start up a server which
|
|
* was not cleanly shut down in single user mode. This will fail if
|
|
* "standby.signal" is present. We'll remove it and restore it after
|
|
* pg_rewind runs.
|
|
*/
|
|
if (hide_standby_signal == true)
|
|
{
|
|
char standby_signal_file_path[MAXPGPATH] = "";
|
|
|
|
log_notice(_("temporarily removing \"standby.signal\""));
|
|
log_detail(_("this is required so pg_rewind can fix the unclean shutdown"));
|
|
|
|
make_standby_signal_path(config_file_options.data_directory,
|
|
standby_signal_file_path);
|
|
|
|
if (unlink(standby_signal_file_path) < 0 && errno != ENOENT)
|
|
{
|
|
log_error(_("unable to remove \"standby.signal\" file in data directory \"%s\""),
|
|
standby_signal_file_path);
|
|
log_detail("%s", strerror(errno));
|
|
exit(ERR_REJOIN_FAIL);
|
|
}
|
|
}
|
|
|
|
initPQExpBuffer(&command_output);
|
|
|
|
ret = local_command(command.data,
|
|
&command_output);
|
|
|
|
termPQExpBuffer(&command);
|
|
|
|
if (hide_standby_signal == true)
|
|
{
|
|
/*
|
|
* Restore standby.signal if we previously removed it, regardless
|
|
* of whether the pg_rewind operation failed.
|
|
*/
|
|
log_notice(_("recreating \"standby.signal\""));
|
|
write_standby_signal(config_file_options.data_directory);
|
|
}
|
|
|
|
if (ret == false)
|
|
{
|
|
log_error(_("pg_rewind execution failed"));
|
|
log_detail("%s", command_output.data);
|
|
|
|
termPQExpBuffer(&command_output);
|
|
|
|
exit(ERR_REJOIN_FAIL);
|
|
}
|
|
|
|
termPQExpBuffer(&command_output);
|
|
|
|
/* Restore any previously archived config files */
|
|
_do_node_restore_config();
|
|
|
|
initPQExpBuffer(&filebuf);
|
|
|
|
/* remove any recovery.done file copied in by pg_rewind */
|
|
appendPQExpBuffer(&filebuf,
|
|
"%s/recovery.done",
|
|
config_file_options.data_directory);
|
|
|
|
if (stat(filebuf.data, &statbuf) == 0)
|
|
{
|
|
log_verbose(LOG_INFO, _("deleting \"recovery.done\""));
|
|
|
|
if (unlink(filebuf.data) == -1)
|
|
{
|
|
log_warning(_("unable to delete \"%s\""),
|
|
filebuf.data);
|
|
log_detail("%s", strerror(errno));
|
|
}
|
|
}
|
|
termPQExpBuffer(&filebuf);
|
|
|
|
/*
|
|
* Delete any replication slots copied in by pg_rewind.
|
|
*
|
|
* TODO:
|
|
* - from PostgreSQL 11, this will be handled by pg_rewind, so
|
|
* we can skip this step from that version; see commit
|
|
* 266b6acb312fc440c1c1a2036aa9da94916beac6
|
|
* - possibly delete contents of various other directories
|
|
* as per the above commit for pre-PostgreSQL 11
|
|
*/
|
|
{
|
|
PQExpBufferData slotdir_path;
|
|
DIR *slotdir;
|
|
struct dirent *slotdir_ent;
|
|
|
|
initPQExpBuffer(&slotdir_path);
|
|
|
|
appendPQExpBuffer(&slotdir_path,
|
|
"%s/pg_replslot",
|
|
config_file_options.data_directory);
|
|
|
|
slotdir = opendir(slotdir_path.data);
|
|
|
|
if (slotdir == NULL)
|
|
{
|
|
log_warning(_("unable to open replication slot directory \"%s\""),
|
|
slotdir_path.data);
|
|
log_detail("%s", strerror(errno));
|
|
}
|
|
else
|
|
{
|
|
while ((slotdir_ent = readdir(slotdir)) != NULL) {
|
|
struct stat local_statbuf;
|
|
PQExpBufferData slotdir_ent_path;
|
|
|
|
if (strcmp(slotdir_ent->d_name, ".") == 0 || strcmp(slotdir_ent->d_name, "..") == 0)
|
|
continue;
|
|
|
|
initPQExpBuffer(&slotdir_ent_path);
|
|
|
|
appendPQExpBuffer(&slotdir_ent_path,
|
|
"%s/%s",
|
|
slotdir_path.data,
|
|
slotdir_ent->d_name);
|
|
|
|
if (stat(slotdir_ent_path.data, &local_statbuf) == 0 && !S_ISDIR(local_statbuf.st_mode))
|
|
{
|
|
termPQExpBuffer(&slotdir_ent_path);
|
|
continue;
|
|
}
|
|
|
|
log_debug("deleting slot directory \"%s\"", slotdir_ent_path.data);
|
|
if (rmdir_recursive(slotdir_ent_path.data) != 0 && errno != EEXIST)
|
|
{
|
|
log_warning(_("unable to delete replication slot directory \"%s\""), slotdir_ent_path.data);
|
|
log_detail("%s", strerror(errno));
|
|
log_hint(_("directory may need to be manually removed"));
|
|
}
|
|
|
|
termPQExpBuffer(&slotdir_ent_path);
|
|
}
|
|
|
|
closedir(slotdir);
|
|
}
|
|
termPQExpBuffer(&slotdir_path);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (runtime_options.dry_run == true)
|
|
{
|
|
log_info(_("prerequisites for executing NODE REJOIN are met"));
|
|
exit(SUCCESS);
|
|
}
|
|
|
|
initPQExpBuffer(&follow_output);
|
|
|
|
/*
|
|
* do_standby_follow_internal() can handle situations where the follow
|
|
* target is not the primary, so requires database handles to both
|
|
* (even if they point to the same node). For the time being,
|
|
* "node rejoin" will only attach a standby to the primary.
|
|
*/
|
|
success = do_standby_follow_internal(primary_conn,
|
|
primary_conn,
|
|
&primary_node_record,
|
|
&follow_output,
|
|
ERR_REJOIN_FAIL,
|
|
&follow_error_code);
|
|
|
|
if (success == false)
|
|
{
|
|
log_error(_("NODE REJOIN failed"));
|
|
|
|
if (strlen(follow_output.data))
|
|
log_detail("%s", follow_output.data);
|
|
|
|
create_event_notification(primary_conn,
|
|
&config_file_options,
|
|
config_file_options.node_id,
|
|
"node_rejoin",
|
|
success,
|
|
follow_output.data);
|
|
|
|
PQfinish(primary_conn);
|
|
|
|
termPQExpBuffer(&follow_output);
|
|
exit(follow_error_code);
|
|
}
|
|
|
|
/*
|
|
* Actively check that node actually started and connected to primary,
|
|
* if not exit with ERR_REJOIN_FAIL.
|
|
*
|
|
* This check can be overridden with -W/--no-wait, in which case a one-time
|
|
* check will be carried out.
|
|
*/
|
|
if (runtime_options.no_wait == false)
|
|
{
|
|
standy_join_status join_success = check_standby_join(primary_conn,
|
|
&primary_node_record,
|
|
&local_node_record);
|
|
|
|
create_event_notification(primary_conn,
|
|
&config_file_options,
|
|
config_file_options.node_id,
|
|
"node_rejoin",
|
|
join_success == JOIN_SUCCESS ? true : false,
|
|
follow_output.data);
|
|
|
|
if (join_success != JOIN_SUCCESS)
|
|
{
|
|
termPQExpBuffer(&follow_output);
|
|
log_error(_("NODE REJOIN failed"));
|
|
|
|
if (join_success == JOIN_FAIL_NO_PING) {
|
|
log_detail(_("local node \"%s\" did not become available start after %i seconds"),
|
|
config_file_options.node_name,
|
|
config_file_options.node_rejoin_timeout);
|
|
}
|
|
else {
|
|
log_detail(_("no active record for local node \"%s\" found in node \"%s\"'s \"pg_stat_replication\" table"),
|
|
config_file_options.node_name,
|
|
primary_node_record.node_name);
|
|
}
|
|
log_hint(_("check the PostgreSQL log on the local node"));
|
|
|
|
exit(ERR_REJOIN_FAIL);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* -W/--no-wait provided - check once */
|
|
NodeAttached node_attached = is_downstream_node_attached(primary_conn, config_file_options.node_name, NULL);
|
|
if (node_attached == NODE_ATTACHED)
|
|
success = true;
|
|
}
|
|
|
|
/*
|
|
* Handle replication slots:
|
|
* - if a slot for the new upstream exists, delete that
|
|
* - warn about any other inactive replication slots
|
|
*/
|
|
if (runtime_options.force_rewind_used == false && config_file_options.use_replication_slots)
|
|
{
|
|
PGconn *local_conn = NULL;
|
|
local_conn = establish_db_connection(config_file_options.conninfo, false);
|
|
|
|
if (PQstatus(local_conn) != CONNECTION_OK)
|
|
{
|
|
log_warning(_("unable to connect to local node to check replication slot status"));
|
|
log_hint(_("execute \"repmgr node check\" to check inactive slots and drop manually if necessary"));
|
|
}
|
|
else
|
|
{
|
|
KeyValueList inactive_replication_slots = {NULL, NULL};
|
|
int inactive_count = 0;
|
|
PQExpBufferData slotinfo;
|
|
|
|
drop_replication_slot_if_exists(local_conn,
|
|
config_file_options.node_id,
|
|
primary_node_record.slot_name);
|
|
|
|
(void) get_inactive_replication_slots(local_conn, &inactive_replication_slots);
|
|
|
|
initPQExpBuffer(&slotinfo);
|
|
for (cell = inactive_replication_slots.head; cell; cell = cell->next)
|
|
{
|
|
appendPQExpBuffer(&slotinfo,
|
|
" - %s (%s)", cell->key, cell->value);
|
|
inactive_count++;
|
|
}
|
|
|
|
if (inactive_count > 0)
|
|
{
|
|
log_warning(_("%i inactive replication slots detected"), inactive_count);
|
|
log_detail(_("inactive replication slots:\n%s"), slotinfo.data);
|
|
log_hint(_("these replication slots may need to be removed manually"));
|
|
}
|
|
|
|
termPQExpBuffer(&slotinfo);
|
|
|
|
PQfinish(local_conn);
|
|
}
|
|
}
|
|
|
|
if (success == true)
|
|
{
|
|
log_notice(_("NODE REJOIN successful"));
|
|
log_detail("%s", follow_output.data);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* if we reach here, no record found in upstream node's pg_stat_replication
|
|
*/
|
|
log_notice(_("NODE REJOIN has completed but node is not yet reattached to upstream"));
|
|
log_hint(_("you will need to manually check the node's replication status"));
|
|
}
|
|
termPQExpBuffer(&follow_output);
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
/*
|
|
* Currently for testing purposes only, not documented;
|
|
* use at own risk!
|
|
*/
|
|
|
|
void
|
|
do_node_control(void)
|
|
{
|
|
PGconn *conn = NULL;
|
|
pid_t wal_receiver_pid = UNKNOWN_PID;
|
|
conn = establish_db_connection(config_file_options.conninfo, true);
|
|
|
|
if (runtime_options.disable_wal_receiver == true)
|
|
{
|
|
wal_receiver_pid = disable_wal_receiver(conn);
|
|
|
|
PQfinish(conn);
|
|
|
|
if (wal_receiver_pid == UNKNOWN_PID)
|
|
exit(ERR_BAD_CONFIG);
|
|
|
|
exit(SUCCESS);
|
|
}
|
|
|
|
if (runtime_options.enable_wal_receiver == true)
|
|
{
|
|
wal_receiver_pid = enable_wal_receiver(conn, true);
|
|
|
|
PQfinish(conn);
|
|
|
|
if (wal_receiver_pid == UNKNOWN_PID)
|
|
exit(ERR_BAD_CONFIG);
|
|
|
|
exit(SUCCESS);
|
|
}
|
|
|
|
log_error(_("no option provided"));
|
|
|
|
PQfinish(conn);
|
|
}
|
|
|
|
|
|
/*
|
|
* For "internal" use by `node rejoin` on the local node when
|
|
* called by "standby switchover" from the remote node.
|
|
*
|
|
* This archives any configuration files in the data directory, which may be
|
|
* overwritten by pg_rewind.
|
|
*
|
|
* Requires configuration file, optionally --config-archive-dir
|
|
*/
|
|
static void
|
|
_do_node_archive_config(void)
|
|
{
|
|
PQExpBufferData archive_dir;
|
|
struct stat statbuf;
|
|
struct dirent *arcdir_ent;
|
|
DIR *arcdir;
|
|
|
|
KeyValueList config_files = {NULL, NULL};
|
|
KeyValueListCell *cell = NULL;
|
|
int copied_count = 0;
|
|
|
|
initPQExpBuffer(&archive_dir);
|
|
format_archive_dir(&archive_dir);
|
|
|
|
/* sanity-check directory path */
|
|
if (stat(archive_dir.data, &statbuf) == -1)
|
|
{
|
|
if (errno != ENOENT)
|
|
{
|
|
log_error(_("error encountered when checking archive directory \"%s\""),
|
|
archive_dir.data);
|
|
log_detail("%s", strerror(errno));
|
|
termPQExpBuffer(&archive_dir);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
/* attempt to create and open the directory */
|
|
if (mkdir(archive_dir.data, S_IRWXU) != 0 && errno != EEXIST)
|
|
{
|
|
log_error(_("unable to create temporary archive directory \"%s\""),
|
|
archive_dir.data);
|
|
log_detail("%s", strerror(errno));
|
|
termPQExpBuffer(&archive_dir);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
if (runtime_options.dry_run == true)
|
|
{
|
|
log_verbose(LOG_INFO, "temporary archive directory \"%s\" created", archive_dir.data);
|
|
}
|
|
}
|
|
else if (!S_ISDIR(statbuf.st_mode))
|
|
{
|
|
log_error(_("\"%s\" exists but is not a directory"),
|
|
archive_dir.data);
|
|
termPQExpBuffer(&archive_dir);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
arcdir = opendir(archive_dir.data);
|
|
|
|
/* always attempt to open the directory */
|
|
if (arcdir == NULL)
|
|
{
|
|
log_error(_("unable to open archive directory \"%s\""),
|
|
archive_dir.data);
|
|
log_detail("%s", strerror(errno));
|
|
termPQExpBuffer(&archive_dir);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
if (runtime_options.dry_run == false)
|
|
{
|
|
|
|
/*
|
|
* attempt to remove any existing files in the directory
|
|
* TODO: collate problem files into list
|
|
*/
|
|
while ((arcdir_ent = readdir(arcdir)) != NULL)
|
|
{
|
|
PQExpBufferData arcdir_ent_path;
|
|
|
|
initPQExpBuffer(&arcdir_ent_path);
|
|
|
|
appendPQExpBuffer(&arcdir_ent_path,
|
|
"%s/%s",
|
|
archive_dir.data,
|
|
arcdir_ent->d_name);
|
|
|
|
if (stat(arcdir_ent_path.data, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
|
|
{
|
|
termPQExpBuffer(&arcdir_ent_path);
|
|
continue;
|
|
}
|
|
|
|
if (unlink(arcdir_ent_path.data) == -1)
|
|
{
|
|
log_error(_("unable to delete file in temporary archive directory"));
|
|
log_detail(_("file is: \"%s\""), arcdir_ent_path.data);
|
|
log_detail("%s", strerror(errno));
|
|
closedir(arcdir);
|
|
termPQExpBuffer(&arcdir_ent_path);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
termPQExpBuffer(&arcdir_ent_path);
|
|
}
|
|
}
|
|
|
|
closedir(arcdir);
|
|
|
|
|
|
/*
|
|
* extract list of config files from --config-files
|
|
*/
|
|
{
|
|
int i = 0;
|
|
int j = 0;
|
|
int config_file_len = strlen(runtime_options.config_files);
|
|
|
|
char filenamebuf[MAXPGPATH] = "";
|
|
PQExpBufferData pathbuf;
|
|
|
|
for (j = 0; j < config_file_len; j++)
|
|
{
|
|
if (runtime_options.config_files[j] == ',')
|
|
{
|
|
int filename_len = j - i;
|
|
|
|
if (filename_len >= MAXPGPATH)
|
|
filename_len = MAXPGPATH - 1;
|
|
|
|
strncpy(filenamebuf, runtime_options.config_files + i, filename_len);
|
|
|
|
filenamebuf[filename_len] = '\0';
|
|
|
|
initPQExpBuffer(&pathbuf);
|
|
|
|
appendPQExpBuffer(&pathbuf,
|
|
"%s/%s",
|
|
config_file_options.data_directory,
|
|
filenamebuf);
|
|
|
|
key_value_list_set(&config_files,
|
|
filenamebuf,
|
|
pathbuf.data);
|
|
termPQExpBuffer(&pathbuf);
|
|
i = j + 1;
|
|
}
|
|
}
|
|
|
|
if (i < config_file_len)
|
|
{
|
|
int filename_len = config_file_len - i;
|
|
|
|
strncpy(filenamebuf, runtime_options.config_files + i, filename_len);
|
|
|
|
filenamebuf[filename_len] = '\0';
|
|
|
|
initPQExpBuffer(&pathbuf);
|
|
appendPQExpBuffer(&pathbuf,
|
|
"%s/%s",
|
|
config_file_options.data_directory,
|
|
filenamebuf);
|
|
|
|
key_value_list_set(&config_files,
|
|
filenamebuf,
|
|
pathbuf.data);
|
|
termPQExpBuffer(&pathbuf);
|
|
}
|
|
}
|
|
|
|
|
|
for (cell = config_files.head; cell; cell = cell->next)
|
|
{
|
|
PQExpBufferData dest_file;
|
|
|
|
initPQExpBuffer(&dest_file);
|
|
|
|
appendPQExpBuffer(&dest_file,
|
|
"%s/%s",
|
|
archive_dir.data,
|
|
cell->key);
|
|
|
|
if (stat(cell->value, &statbuf) == -1)
|
|
{
|
|
log_warning(_("specified file \"%s\" not found, skipping"),
|
|
cell->value);
|
|
}
|
|
else
|
|
{
|
|
if (runtime_options.dry_run == true)
|
|
{
|
|
log_info("file \"%s\" would be copied to \"%s\"",
|
|
cell->key, dest_file.data);
|
|
copied_count++;
|
|
}
|
|
else
|
|
{
|
|
log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"",
|
|
cell->key, dest_file.data);
|
|
copy_file(cell->value, dest_file.data);
|
|
copied_count++;
|
|
}
|
|
}
|
|
|
|
termPQExpBuffer(&dest_file);
|
|
}
|
|
|
|
if (runtime_options.dry_run == true)
|
|
{
|
|
log_verbose(LOG_INFO, _("%i files would have been copied to \"%s\""),
|
|
copied_count, archive_dir.data);
|
|
}
|
|
else
|
|
{
|
|
log_verbose(LOG_INFO, _("%i files copied to \"%s\""),
|
|
copied_count, archive_dir.data);
|
|
}
|
|
|
|
if (runtime_options.dry_run == true)
|
|
{
|
|
/*
|
|
* Delete directory in --dry-run mode - it should be empty unless it's been
|
|
* interfered with for some reason, in which case manual intervention is
|
|
* required
|
|
*/
|
|
if (rmdir(archive_dir.data) != 0 && errno != EEXIST)
|
|
{
|
|
log_warning(_("unable to delete directory \"%s\""), archive_dir.data);
|
|
log_detail("%s", strerror(errno));
|
|
log_hint(_("directory may need to be manually removed"));
|
|
}
|
|
else
|
|
{
|
|
log_verbose(LOG_INFO, "temporary archive directory \"%s\" deleted", archive_dir.data);
|
|
}
|
|
}
|
|
|
|
termPQExpBuffer(&archive_dir);
|
|
}
|
|
|
|
|
|
/*
|
|
* Intended mainly for "internal" use by `standby switchover`, which
|
|
* calls this on the target server to restore any configuration files
|
|
* to the data directory, which may have been overwritten by an operation
|
|
* like pg_rewind
|
|
*
|
|
* Not designed to be called if the instance is running, but does
|
|
* not currently check.
|
|
*
|
|
* Requires -D/--pgdata, optionally --config-archive-dir
|
|
*
|
|
* Removes --config-archive-dir after successful copy
|
|
*/
|
|
|
|
static void
|
|
_do_node_restore_config(void)
|
|
{
|
|
PQExpBufferData archive_dir;
|
|
|
|
DIR *arcdir;
|
|
struct dirent *arcdir_ent;
|
|
int copied_count = 0;
|
|
bool copy_ok = true;
|
|
|
|
initPQExpBuffer(&archive_dir);
|
|
|
|
format_archive_dir(&archive_dir);
|
|
|
|
arcdir = opendir(archive_dir.data);
|
|
|
|
if (arcdir == NULL)
|
|
{
|
|
log_error(_("unable to open archive directory \"%s\""),
|
|
archive_dir.data);
|
|
log_detail("%s", strerror(errno));
|
|
termPQExpBuffer(&archive_dir);
|
|
exit(ERR_BAD_CONFIG);
|
|
}
|
|
|
|
while ((arcdir_ent = readdir(arcdir)) != NULL)
|
|
{
|
|
struct stat statbuf;
|
|
PQExpBufferData src_file_path;
|
|
PQExpBufferData dest_file_path;
|
|
|
|
initPQExpBuffer(&src_file_path);
|
|
|
|
appendPQExpBuffer(&src_file_path,
|
|
"%s/%s",
|
|
archive_dir.data,
|
|
arcdir_ent->d_name);
|
|
|
|
/* skip non-files */
|
|
if (stat(src_file_path.data, &statbuf) == 0 && !S_ISREG(statbuf.st_mode))
|
|
{
|
|
termPQExpBuffer(&src_file_path);
|
|
continue;
|
|
}
|
|
|
|
initPQExpBuffer(&dest_file_path);
|
|
|
|
appendPQExpBuffer(&dest_file_path,
|
|
"%s/%s",
|
|
config_file_options.data_directory,
|
|
arcdir_ent->d_name);
|
|
|
|
log_verbose(LOG_DEBUG, "copying \"%s\" to \"%s\"",
|
|
src_file_path.data, dest_file_path.data);
|
|
|
|
if (copy_file(src_file_path.data, dest_file_path.data) == false)
|
|
{
|
|
copy_ok = false;
|
|
log_warning(_("unable to copy \"%s\" to \"%s\""),
|
|
arcdir_ent->d_name, runtime_options.data_dir);
|
|
}
|
|
else
|
|
{
|
|
unlink(src_file_path.data);
|
|
copied_count++;
|
|
}
|
|
|
|
termPQExpBuffer(&dest_file_path);
|
|
termPQExpBuffer(&src_file_path);
|
|
}
|
|
|
|
closedir(arcdir);
|
|
|
|
log_notice(_("%i files copied to %s"),
|
|
copied_count,
|
|
config_file_options.data_directory);
|
|
|
|
if (copy_ok == false)
|
|
{
|
|
log_warning(_("unable to copy all files from \"%s\""), archive_dir.data);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* Finally, delete directory - it should be empty unless it's been
|
|
* interfered with for some reason, in which case manual intervention is
|
|
* required
|
|
*/
|
|
if (rmdir(archive_dir.data) != 0 && errno != EEXIST)
|
|
{
|
|
log_warning(_("unable to delete directory \"%s\""), archive_dir.data);
|
|
log_detail("%s", strerror(errno));
|
|
log_hint(_("directory may need to be manually removed"));
|
|
}
|
|
else
|
|
{
|
|
log_verbose(LOG_INFO, "directory \"%s\" deleted", archive_dir.data);
|
|
}
|
|
}
|
|
|
|
termPQExpBuffer(&archive_dir);
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
static void
|
|
format_archive_dir(PQExpBufferData *archive_dir)
|
|
{
|
|
appendPQExpBuffer(archive_dir,
|
|
"%s/repmgr-config-archive-%s",
|
|
runtime_options.config_archive_dir,
|
|
config_file_options.node_name);
|
|
|
|
log_verbose(LOG_DEBUG, "using archive directory \"%s\"", archive_dir->data);
|
|
}
|
|
|
|
|
|
static bool
|
|
copy_file(const char *src_file, const char *dest_file)
|
|
{
|
|
FILE *ptr_old,
|
|
*ptr_new;
|
|
int a = 0;
|
|
|
|
ptr_old = fopen(src_file, "r");
|
|
|
|
if (ptr_old == NULL)
|
|
return false;
|
|
|
|
ptr_new = fopen(dest_file, "w");
|
|
|
|
if (ptr_new == NULL)
|
|
{
|
|
fclose(ptr_old);
|
|
return false;
|
|
}
|
|
|
|
chmod(dest_file, S_IRUSR | S_IWUSR);
|
|
|
|
while (1)
|
|
{
|
|
a = fgetc(ptr_old);
|
|
|
|
if (!feof(ptr_old))
|
|
{
|
|
fputc(a, ptr_new);
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
fclose(ptr_new);
|
|
fclose(ptr_old);
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
static const char *
|
|
output_repmgrd_status(CheckStatus status)
|
|
{
|
|
switch (status)
|
|
{
|
|
case CHECK_STATUS_OK:
|
|
return "repmgrd running";
|
|
case CHECK_STATUS_WARNING:
|
|
return "repmgrd running but paused";
|
|
case CHECK_STATUS_CRITICAL:
|
|
return "repmgrd not running";
|
|
case CHECK_STATUS_UNKNOWN:
|
|
return "repmgrd status unknown";
|
|
}
|
|
|
|
return "UNKNOWN";
|
|
}
|
|
|
|
|
|
void
|
|
do_node_help(void)
|
|
{
|
|
print_help_header();
|
|
|
|
printf(_("Usage:\n"));
|
|
printf(_(" %s [OPTIONS] node status\n"), progname());
|
|
printf(_(" %s [OPTIONS] node check\n"), progname());
|
|
printf(_(" %s [OPTIONS] node rejoin\n"), progname());
|
|
printf(_(" %s [OPTIONS] node service\n"), progname());
|
|
puts("");
|
|
|
|
printf(_("NODE STATUS\n"));
|
|
puts("");
|
|
printf(_(" \"node status\" displays an overview of a node's basic information and replication status.\n"));
|
|
puts("");
|
|
printf(_(" Configuration file required, runs on local node only.\n"));
|
|
puts("");
|
|
printf(_(" --csv emit output as CSV\n"));
|
|
puts("");
|
|
|
|
printf(_("NODE CHECK\n"));
|
|
puts("");
|
|
printf(_(" \"node check\" performs some health checks on a node from a replication perspective.\n"));
|
|
puts("");
|
|
printf(_(" Configuration file required, runs on local node only.\n"));
|
|
puts("");
|
|
printf(_(" Connection options:\n"));
|
|
printf(_(" -S, --superuser=USERNAME superuser to use, if repmgr user is not superuser\n"));
|
|
puts("");
|
|
printf(_(" Output options:\n"));
|
|
printf(_(" --csv emit output as CSV (not available for individual check output)\n"));
|
|
printf(_(" --nagios emit output in Nagios format (individual check output only)\n"));
|
|
puts("");
|
|
printf(_(" Following options check an individual status:\n"));
|
|
printf(_(" --archive-ready number of WAL files ready for archiving\n"));
|
|
printf(_(" --downstream whether all downstream nodes are connected\n"));
|
|
printf(_(" --upstream whether the node is connected to its upstream\n"));
|
|
printf(_(" --replication-lag replication lag in seconds (standbys only)\n"));
|
|
printf(_(" --role check node has expected role\n"));
|
|
printf(_(" --slots check for inactive replication slots\n"));
|
|
printf(_(" --missing-slots check for missing replication slots\n"));
|
|
printf(_(" --repmgrd check if repmgrd is running\n"));
|
|
printf(_(" --data-directory-config check repmgr's data directory configuration\n"));
|
|
|
|
puts("");
|
|
|
|
printf(_("NODE REJOIN\n"));
|
|
puts("");
|
|
printf(_(" \"node rejoin\" enables a dormant (stopped) node to be rejoined to the replication cluster.\n"));
|
|
puts("");
|
|
printf(_(" Configuration file required, runs on local node only.\n"));
|
|
puts("");
|
|
printf(_(" --dry-run check that the prerequisites are met for rejoining the node\n" \
|
|
" (including usability of \"pg_rewind\" if requested)\n"));
|
|
printf(_(" --force-rewind[=VALUE] execute \"pg_rewind\" if necessary\n"));
|
|
printf(_(" (PostgreSQL 9.4 - provide full \"pg_rewind\" path)\n"));
|
|
|
|
printf(_(" --config-files comma-separated list of configuration files to retain\n" \
|
|
" after executing \"pg_rewind\"\n"));
|
|
printf(_(" --config-archive-dir directory to temporarily store retained configuration files\n" \
|
|
" (default: /tmp)\n"));
|
|
printf(_(" -W, --no-wait don't wait for the node to rejoin cluster\n"));
|
|
puts("");
|
|
|
|
printf(_("NODE SERVICE\n"));
|
|
puts("");
|
|
printf(_(" \"node service\" executes a system service command to stop/start/restart/reload a node\n" \
|
|
" or optionally display which command would be executed\n"));
|
|
puts("");
|
|
printf(_(" Configuration file required, runs on local node only.\n"));
|
|
puts("");
|
|
printf(_(" --dry-run show what action would be performed, but don't execute it\n"));
|
|
printf(_(" --action action to perform (one of \"start\", \"stop\", \"restart\" or \"reload\")\n"));
|
|
printf(_(" --list-actions show what command would be performed for each action\n"));
|
|
printf(_(" --checkpoint issue a CHECKPOINT before stopping or restarting the node\n"));
|
|
printf(_(" -S, --superuser=USERNAME superuser to use, if repmgr user is not superuser\n"));
|
|
|
|
puts("");
|
|
|
|
printf(_("%s home page: <%s>\n"), "repmgr", REPMGR_URL);
|
|
}
|