Add --data-directory-config option to "repmgr node check"

Implements part of GitHub #523.
This commit is contained in:
Ian Barwick
2019-01-16 11:21:51 +09:00
parent 4523137bfc
commit 0b3a310802
11 changed files with 216 additions and 41 deletions

View File

@@ -2,6 +2,7 @@
repmgr: add --version-number command line option (Ian)
repmgr: add --terse option to "cluster show"; GitHub #521 (Ian)
repmgr: add --dry-run option to "standby promote"; GitHub #522 (Ian)
repmgr: add "node check --data-directory-config"; GitHub #523 (Ian)
repmgr: "standby switchover": improve handling of connection URIs when
executing "node rejoin" on the demotion candidate; GitHub #525 (Ian)
repmgrd: check binary and extension major versions match; GitHub #515 (Ian)

View File

@@ -198,9 +198,9 @@ mkdir_p(char *path, mode_t omode)
bool
is_pg_dir(char *path)
is_pg_dir(const char *path)
{
char dirpath[MAXPGPATH];
char dirpath[MAXPGPATH] = "";
struct stat sb;
/* test pgdata */

View File

@@ -39,7 +39,7 @@ extern bool set_dir_permissions(char *path);
extern DataDirState check_dir(char *path);
extern bool create_dir(char *path);
extern bool is_pg_dir(char *path);
extern bool is_pg_dir(const char *path);
extern PgDirState is_pg_running(char *path);
extern bool create_pg_dir(char *path, bool force);
extern int rmdir_recursive(char *path);

View File

@@ -50,6 +50,13 @@
</para>
</listitem>
<listitem>
<para>
<link linkend="repmgr-node-check"><command>repmgr node check --data-directory-config</command></link>
option added; this is to confirm &repmgr; is correctly configured.
</para>
</listitem>
</itemizedlist>
</para>
</sect2>

View File

@@ -87,44 +87,44 @@
<varlistentry>
<term><option>--csv</option></term>
<listitem>
<para>
<command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
outputs the replication cluster's status in a simple CSV format, suitable for
parsing by scripts, e.g.:
<programlisting>
<para>
<command>repmgr cluster show</command> accepts an optional parameter <literal>--csv</literal>, which
outputs the replication cluster's status in a simple CSV format, suitable for
parsing by scripts, e.g.:
<programlisting>
$ repmgr -f /etc/repmgr.conf cluster show --csv
1,-1,-1
2,0,0
3,0,1</programlisting>
</para>
<para>
The columns have following meanings:
<itemizedlist spacing="compact" mark="bullet">
<listitem>
<simpara>
node ID
</simpara>
</listitem>
<listitem>
<simpara>
</para>
<para>
The columns have following meanings:
<itemizedlist spacing="compact" mark="bullet">
<listitem>
<simpara>
node ID
</simpara>
</listitem>
<listitem>
<simpara>
availability (0 = available, -1 = unavailable)
</simpara>
</listitem>
<listitem>
<simpara>
recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
</simpara>
</listitem>
</itemizedlist>
</para>
</listitem>
</varlistentry>
</simpara>
</listitem>
<listitem>
<simpara>
recovery state (0 = not in recovery, 1 = in recovery, -1 = unknown)
</simpara>
</listitem>
</itemizedlist>
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--terse</option></term>
<listitem>
<para>
Suppress display of the <literal>conninfo</literal> column.
Suppress display of the <literal>conninfo</literal> column.
</para>
</listitem>
</varlistentry>

View File

@@ -86,6 +86,16 @@
</simpara>
</listitem>
<listitem>
<simpara>
<literal>--data-directory-config</literal>: checks the data directory configured in
<filename>repmgr.conf</filename> matches the actual data directory.
This check is not directly related to replication, but is useful to verify &repmgr;
is correctly configured.
</simpara>
</listitem>
</itemizedlist>
</para>
</refsect1>
@@ -105,6 +115,7 @@
<listitem>
<simpara>
<literal>--nagios</literal>: generate output in a Nagios-compatible format
(for individual checks only)
</simpara>
</listitem>
</itemizedlist>

View File

@@ -48,7 +48,7 @@ static CheckStatus do_node_check_replication_lag(PGconn *conn, OutputMode mode,
static CheckStatus do_node_check_role(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
static CheckStatus do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output);
/*
* NODE STATUS
*
@@ -82,6 +82,9 @@ do_node_status(void)
int server_version_num = UNKNOWN_SERVER_VERSION_NUM;
char server_version_str[MAXVERSIONSTR] = "";
/*
* A database connection is *not* required for this check
*/
if (runtime_options.is_shutdown_cleanly == true)
{
return _do_node_status_is_shutdown_cleanly();
@@ -653,7 +656,8 @@ _do_node_status_is_shutdown_cleanly(void)
node_status = NODE_STATUS_DOWN;
}
log_verbose(LOG_DEBUG, "node status determined as: %s", print_node_status(node_status));
log_verbose(LOG_DEBUG, "node status determined as: %s",
print_node_status(node_status));
switch (node_status)
{
@@ -789,6 +793,16 @@ do_node_check(void)
exit(return_code);
}
if (runtime_options.data_directory_config == true)
{
return_code = do_node_check_data_directory(conn,
runtime_options.output_mode,
&node_info,
NULL);
PQfinish(conn);
exit(return_code);
}
if (runtime_options.output_mode == OM_NAGIOS)
{
@@ -821,6 +835,9 @@ do_node_check(void)
if (do_node_check_missing_slots(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
issue_detected = true;
if (do_node_check_data_directory(conn, runtime_options.output_mode, &node_info, &status_list) != CHECK_STATUS_OK)
issue_detected = true;
if (runtime_options.output_mode == OM_CSV)
{
appendPQExpBuffer(&output,
@@ -1442,11 +1459,9 @@ do_node_check_replication_lag(PGconn *conn, OutputMode mode, t_node_info *node_i
switch (mode)
{
case OM_OPTFORMAT:
{
printf("--status=%s %s\n",
output_check_status(status),
details.data);
}
printf("--status=%s %s\n",
output_check_status(status),
details.data);
break;
case OM_NAGIOS:
printf("REPMGR_REPLICATION_LAG %s: %s\n",
@@ -1797,6 +1812,135 @@ do_node_check_missing_slots(PGconn *conn, OutputMode mode, t_node_info *node_inf
}
CheckStatus
do_node_check_data_directory(PGconn *conn, OutputMode mode, t_node_info *node_info, CheckStatusList *list_output)
{
CheckStatus status = CHECK_STATUS_OK;
char actual_data_directory[MAXPGPATH] = "";
PQExpBufferData details;
if (mode == OM_CSV && list_output == NULL)
{
log_error(_("--csv output not provided with --data-directory-config option"));
PQfinish(conn);
exit(ERR_BAD_CONFIG);
}
initPQExpBuffer(&details);
/*
* Check actual data directory matches that in repmgr.conf; note this requires
* a superuser connection
*/
if (is_superuser_connection(conn, NULL) == true)
{
/* we expect to have a database connection */
if (get_pg_setting(conn, "data_directory", actual_data_directory) == false)
{
appendPQExpBuffer(&details,
_("unable to determine current \"data_directory\""));
status = CHECK_STATUS_UNKNOWN;
}
if (strncmp(actual_data_directory, config_file_options.data_directory, MAXPGPATH) != 0)
{
if (mode != OM_NAGIOS)
{
appendPQExpBuffer(&details,
_("configured \"data_directory\" is \"%s\"; "),
config_file_options.data_directory);
}
appendPQExpBuffer(&details,
"actual data directory is \"%s\"",
actual_data_directory);
status = CHECK_STATUS_CRITICAL;
}
else
{
appendPQExpBuffer(&details,
_("configured \"data_directory\" is \"%s\""),
config_file_options.data_directory);
}
}
/*
* If no superuser connection available, sanity-check that the configuration directory looks
* like a PostgreSQL directory and hope it's the right one.
*/
else
{
if (mode == OM_TEXT)
{
log_info(_("connection is not a superuser connection, falling back to simple check"));
/* XXX add -S/--superuser option */
if (PQserverVersion(conn) >= 100000)
{
log_hint(_("add the \"%s\" user to group \"pg_read_all_settings\""),
PQuser(conn));
}
}
if (is_pg_dir(config_file_options.data_directory) == false)
{
if (mode == OM_NAGIOS)
{
appendPQExpBufferStr(&details,
_("configured \"data_directory\" is not a PostgreSQL data directory"));
}
else
{
appendPQExpBuffer(&details,
_("configured \"data_directory\" \"%s\" is not a PostgreSQL data directory"),
actual_data_directory);
}
status = CHECK_STATUS_CRITICAL;
}
}
switch (mode)
{
case OM_OPTFORMAT:
printf("--configured-data-directory=%s\n",
output_check_status(status));
break;
case OM_NAGIOS:
printf("REPMGR_DATA_DIRECTORY %s: %s",
output_check_status(status),
config_file_options.data_directory);
if (status == CHECK_STATUS_CRITICAL)
{
printf(" | %s", details.data);
}
puts("");
break;
case OM_CSV:
case OM_TEXT:
if (list_output != NULL)
{
check_status_list_set(list_output,
"Configured data directory",
status,
details.data);
}
else
{
printf("%s (%s)\n",
output_check_status(status),
details.data);
}
default:
break;
}
termPQExpBuffer(&details);
return status;
}
void
do_node_service(void)
@@ -2924,8 +3068,8 @@ do_node_help(void)
puts("");
printf(_(" Configuration file required, runs on local node only.\n"));
puts("");
printf(_(" --csv emit output as CSV\n"));
printf(_(" --nagios emit output in Nagios format (individual status output only)\n"));
printf(_(" --csv emit output as CSV (not available for individual check output)\n"));
printf(_(" --nagios emit output in Nagios format (individual check output only)\n"));
puts("");
printf(_(" Following options check an individual status:\n"));
printf(_(" --archive-ready number of WAL files ready for archiving\n"));
@@ -2934,6 +3078,7 @@ do_node_help(void)
printf(_(" --role check node has expected role\n"));
printf(_(" --slots check for inactive replication slots\n"));
printf(_(" --missing-slots check for missing replication slots\n"));
printf(_(" --data-directory-config check repmgr's data directory configuration\n"));
puts("");

View File

@@ -3472,6 +3472,10 @@ do_standby_switchover(void)
}
termPQExpBuffer(&command_output);
/* check remote repmgr has the data directory correctly configured */
// - add repmgr node check --data-directory
/*
* populate local node record with current state of various replication-related
* values, so we can check for sufficient walsenders and replication slots

View File

@@ -111,6 +111,7 @@ typedef struct
bool missing_slots;
bool has_passfile;
bool replication_connection;
bool data_directory_config;
/* "node rejoin" options */
char config_files[MAXLEN];
@@ -161,7 +162,7 @@ typedef struct
/* "node status" options */ \
false, \
/* "node check" options */ \
false, false, false, false, false, false, false, false, \
false, false, false, false, false, false, false, false, false, \
/* "node rejoin" options */ \
"", \
/* "node service" options */ \

View File

@@ -497,6 +497,10 @@ main(int argc, char **argv)
runtime_options.replication_connection = true;
break;
case OPT_DATA_DIRECTORY_CONFIG:
runtime_options.data_directory_config = true;
break;
/*--------------------
* "node rejoin" options
*--------------------

View File

@@ -93,6 +93,7 @@
#define OPT_MISSING_SLOTS 1041
#define OPT_REPMGRD_NO_PAUSE 1042
#define OPT_VERSION_NUMBER 1043
#define OPT_DATA_DIRECTORY_CONFIG 1044
/* deprecated since 3.3 */
@@ -177,6 +178,7 @@ static struct option long_options[] =
{"missing-slots", no_argument, NULL, OPT_MISSING_SLOTS},
{"has-passfile", no_argument, NULL, OPT_HAS_PASSFILE},
{"replication-connection", no_argument, NULL, OPT_REPL_CONN},
{"data-directory-config", no_argument, NULL, OPT_DATA_DIRECTORY_CONFIG},
/* "node rejoin" options */
{"config-files", required_argument, NULL, OPT_CONFIG_FILES},