mirror of
https://github.com/EnterpriseDB/repmgr.git
synced 2026-03-22 22:56:29 +00:00
daemon (start|stop): verify that repmgrd starts/stops.
Note this may not always be possible for "daemon stop" if we are unable to determine the repmgrd PID.
This commit is contained in:
@@ -23,7 +23,11 @@
|
||||
This command starts the <application>repmgrd</application> daemon on the
|
||||
local node.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
By default, &repmgr; will wait for up to 15 seconds to confirm that <application>repmgrd</application>
|
||||
started. This behaviour can be overridden by specifying a diffent value using the <option>--wait</option>
|
||||
option, or disabled altogether with the <option>--no-wait</option> option.
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
@@ -65,6 +69,33 @@
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--wait</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Wait for the specified number of seconds to confirm that <application>repmgrd</application>
|
||||
started successfully.
|
||||
</para>
|
||||
<para>
|
||||
Note that providing <option>--wait=0</option> is the equivalent of <option>--no-wait</option>.
|
||||
</para>
|
||||
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--no-wait</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Don't wait to confirm that <application>repmgrd</application>
|
||||
started successfully.
|
||||
</para>
|
||||
<para>
|
||||
This is equivalent to providing <option>--wait=0</option>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
@@ -79,7 +110,12 @@
|
||||
<term><option>SUCCESS (0)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<application>repmgrd</application> could be started.
|
||||
The <application>repmgrd</application> start command (defined in
|
||||
<varname>repmgrd_service_start_command</varname>) was successfully executed.
|
||||
</para>
|
||||
<para>
|
||||
If the <option>--wait</option> option was provided, &repmgr; will confirm that
|
||||
<application>repmgrd</application> has actually started up.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@@ -94,11 +130,33 @@
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_DB_CONN (6)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
&repmgr; was unable to connect to the local PostgreSQL node.
|
||||
</para>
|
||||
<para>
|
||||
PostgreSQL must be running before <application>repmgrd</application>
|
||||
can be started. Additionally, unless the <option>--no-wait</option> option was
|
||||
provided, &repmgr; needs to be able to connect to the local PostgreSQL node
|
||||
to determine the state of <application>repmgrd</application>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
|
||||
<varlistentry>
|
||||
<term><option>ERR_REPMGRD_SERVICE (27)</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
<application>repmgrd</application> could not be started.
|
||||
The <application>repmgrd</application> start command (defined in
|
||||
<varname>repmgrd_service_start_command</varname>) was not successfully executed.
|
||||
</para>
|
||||
<para>
|
||||
This can also mean that &repmgr; was unable to confirm whether <application>repmgrd</application>
|
||||
successfully started (unless the <option>--no-wait</option> option was provided).
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
@@ -109,7 +167,7 @@
|
||||
<refsect1>
|
||||
<title>See also</title>
|
||||
<para>
|
||||
<xref linkend="repmgr-daemon-stop">, <xref linkend="repmgr-daemon-status">
|
||||
<xref linkend="repmgr-daemon-stop">, <xref linkend="repmgr-daemon-status">, <xref linkend="repmgrd-daemon">
|
||||
</para>
|
||||
</refsect1>
|
||||
|
||||
|
||||
@@ -24,6 +24,18 @@
|
||||
local node.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
By default, &repmgr; will wait for up to 15 seconds to confirm that <application>repmgrd</application>
|
||||
stopped. This behaviour can be overridden by specifying a diffent value using the <option>--wait</option>
|
||||
option, or disabled altogether with the <option>--no-wait</option> option.
|
||||
</para>
|
||||
<note>
|
||||
<para>
|
||||
If PostgreSQL is not running on the local node, under some circumstances &repmgr; may not
|
||||
be able to confirm if <application>repmgrd</application> has actually stopped.
|
||||
</para>
|
||||
</note>
|
||||
|
||||
</refsect1>
|
||||
|
||||
<refsect1>
|
||||
@@ -65,6 +77,32 @@
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>-w/--wait</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Wait for the specified number of seconds to confirm that <application>repmgrd</application>
|
||||
stopped successfully.
|
||||
</para>
|
||||
<para>
|
||||
Note that providing <option>--wait=0</option> is the equivalent of <option>--no-wait</option>.
|
||||
</para>
|
||||
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
||||
<varlistentry>
|
||||
<term><option>--no-wait</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Don't wait to confirm that <application>repmgrd</application>
|
||||
stopped successfully.
|
||||
</para>
|
||||
<para>
|
||||
This is equivalent to providing <option>--wait=0</option>.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
</variablelist>
|
||||
</refsect1>
|
||||
|
||||
|
||||
@@ -348,7 +348,7 @@
|
||||
|
||||
</sect1>
|
||||
|
||||
<sect1 id="repmgrd-daemon">
|
||||
<sect1 id="repmgrd-daemon" xreflabel="repmgrd daemon">
|
||||
<indexterm>
|
||||
<primary>repmgrd</primary>
|
||||
<secondary>starting and stopping</secondary>
|
||||
|
||||
@@ -25,7 +25,9 @@
|
||||
#include "repmgr-client-global.h"
|
||||
#include "repmgr-action-daemon.h"
|
||||
|
||||
|
||||
#define REPMGR_DAEMON_STOP_START_WAIT 15
|
||||
#define REPMGR_DAEMON_STATUS_START_HINT _("use \"repmgr daemon status\" to confirm that repmgrd was successfully started")
|
||||
#define REPMGR_DAEMON_STATUS_STOP_HINT _("use \"repmgr daemon status\" to confirm that repmgrd was successfully stopped")
|
||||
|
||||
/*
|
||||
* Possibly also show:
|
||||
@@ -410,7 +412,7 @@ do_daemon_start(void)
|
||||
/* TODO: if PostgreSQL is not available, have repmgrd loop and retry connection */
|
||||
log_error(_("unable to connect to local node"));
|
||||
log_detail(_("PostgreSQL must be running before \"repmgrd\" can be started"));
|
||||
exit(ERR_REPMGRD_SERVICE);
|
||||
exit(ERR_DB_CONN);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -421,7 +423,17 @@ do_daemon_start(void)
|
||||
|
||||
if (is_repmgrd_running(conn) == true)
|
||||
{
|
||||
pid_t pid = UNKNOWN_PID;
|
||||
|
||||
log_error(_("repmgrd appears to be running already"));
|
||||
|
||||
pid = repmgrd_get_pid(conn);
|
||||
|
||||
if (pid != UNKNOWN_PID)
|
||||
log_detail(_("repmgrd PID is %i"), pid);
|
||||
else
|
||||
log_warning(_("unable to determine repmgrd PID"));
|
||||
|
||||
PQfinish(conn);
|
||||
exit(ERR_REPMGRD_SERVICE);
|
||||
}
|
||||
@@ -457,6 +469,52 @@ do_daemon_start(void)
|
||||
}
|
||||
|
||||
termPQExpBuffer(&output_buf);
|
||||
|
||||
if (runtime_options.no_wait == true || runtime_options.wait == 0)
|
||||
{
|
||||
log_hint(REPMGR_DAEMON_STATUS_START_HINT);
|
||||
}
|
||||
else
|
||||
{
|
||||
int i = 0;
|
||||
int timeout = REPMGR_DAEMON_STOP_START_WAIT;
|
||||
|
||||
if (runtime_options.wait_provided)
|
||||
timeout = runtime_options.wait;
|
||||
|
||||
conn = establish_db_connection(config_file_options.conninfo, false);
|
||||
|
||||
if (PQstatus(conn) != CONNECTION_OK)
|
||||
{
|
||||
log_notice(_("unable to connect to local node"));
|
||||
log_hint(REPMGR_DAEMON_STATUS_START_HINT);
|
||||
exit(ERR_DB_CONN);
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (is_repmgrd_running(conn) == true)
|
||||
{
|
||||
log_notice(_("repmgrd was successfully started"));
|
||||
PQfinish(conn);
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == timeout)
|
||||
{
|
||||
PQfinish(conn);
|
||||
log_error(_("repmgrd does not appear to have started after %i seconds"),
|
||||
timeout);
|
||||
log_hint(REPMGR_DAEMON_STATUS_START_HINT);
|
||||
exit(ERR_REPMGRD_SERVICE);
|
||||
}
|
||||
|
||||
log_debug("sleeping 1 second; %i of %i attempts to determine if repmgrd is running",
|
||||
i, runtime_options.wait);
|
||||
sleep(1);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -468,7 +526,7 @@ void do_daemon_stop(void)
|
||||
bool success;
|
||||
pid_t pid = UNKNOWN_PID;
|
||||
|
||||
if (config_file_options.repmgrd_service_start_command[0] == '\0')
|
||||
if (config_file_options.repmgrd_service_stop_command[0] == '\0')
|
||||
{
|
||||
log_error(_("\"repmgrd_service_stop_command\" is not set"));
|
||||
log_hint(_("set \"repmgrd_service_stop_command\" in \"repmgr.conf\""));
|
||||
@@ -485,6 +543,9 @@ void do_daemon_stop(void)
|
||||
|
||||
if (PQstatus(conn) != CONNECTION_OK)
|
||||
{
|
||||
/*
|
||||
* a PostgreSQL connection is not required to stop repmgrd,
|
||||
*/
|
||||
log_warning(_("unable to connect to local node"));
|
||||
}
|
||||
else
|
||||
@@ -534,6 +595,66 @@ void do_daemon_stop(void)
|
||||
}
|
||||
|
||||
termPQExpBuffer(&output_buf);
|
||||
|
||||
if (runtime_options.no_wait == true || runtime_options.wait == 0)
|
||||
{
|
||||
log_hint(REPMGR_DAEMON_STATUS_STOP_HINT);
|
||||
}
|
||||
else
|
||||
{
|
||||
int i = 0;
|
||||
int timeout = REPMGR_DAEMON_STOP_START_WAIT;
|
||||
/*
|
||||
*
|
||||
*/
|
||||
if (pid == UNKNOWN_PID)
|
||||
{
|
||||
/*
|
||||
* XXX attempt to get pidfile from config
|
||||
* and get contents
|
||||
* ( see check_and_create_pid_file() )
|
||||
* if PID still unknown, exit here
|
||||
*/
|
||||
log_warning(_("unable to determine repmgrd PID"));
|
||||
log_hint(REPMGR_DAEMON_STATUS_STOP_HINT);
|
||||
exit(ERR_REPMGRD_SERVICE);
|
||||
}
|
||||
|
||||
if (runtime_options.wait_provided)
|
||||
timeout = runtime_options.wait;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (kill(pid, 0) == -1)
|
||||
{
|
||||
if (errno == ESRCH)
|
||||
{
|
||||
log_notice(_("repmgrd was successfully stopped"));
|
||||
exit(SUCCESS);
|
||||
}
|
||||
else
|
||||
{
|
||||
log_error(_("unable to determine status of process with PID %i"), pid);
|
||||
log_detail("%s", strerror(errno));
|
||||
exit(ERR_REPMGRD_SERVICE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (i == timeout)
|
||||
{
|
||||
log_error(_("repmgrd does not appear to have stopped after %i seconds"),
|
||||
timeout);
|
||||
log_hint(REPMGR_DAEMON_STATUS_START_HINT);
|
||||
exit(ERR_REPMGRD_SERVICE);
|
||||
}
|
||||
|
||||
log_debug("sleeping 1 second; %i of %i attempts to determine if repmgrd with PID %i is running",
|
||||
i, timeout, pid);
|
||||
sleep(1);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -559,16 +680,20 @@ void do_daemon_help(void)
|
||||
|
||||
printf(_("DAEMON START\n"));
|
||||
puts("");
|
||||
printf(_(" \"daemon start\" attempts to start repmgrd"));
|
||||
printf(_(" \"daemon start\" attempts to start repmgrd\n"));
|
||||
puts("");
|
||||
printf(_(" --dry-run check prerequisites but don't start repmgrd\n"));
|
||||
printf(_(" -w/--wait wait for repmgrd to start (default: %i seconds)\n"), REPMGR_DAEMON_STOP_START_WAIT);
|
||||
printf(_(" --no-wait don't wait for repmgrd to start\n"));
|
||||
puts("");
|
||||
|
||||
printf(_("DAEMON STOP\n"));
|
||||
puts("");
|
||||
printf(_(" \"daemon stop\" attempts to stop repmgrd"));
|
||||
printf(_(" \"daemon stop\" attempts to stop repmgrd\n"));
|
||||
puts("");
|
||||
printf(_(" --dry-run check prerequisites but don't stop repmgrd\n"));
|
||||
printf(_(" -w/--wait wait for repmgrd to stop (default: %i seconds)\n"), REPMGR_DAEMON_STOP_START_WAIT);
|
||||
printf(_(" --no-wait don't wait for repmgrd to stop\n"));
|
||||
puts("");
|
||||
|
||||
printf(_("DAEMON PAUSE\n"));
|
||||
|
||||
@@ -2298,7 +2298,7 @@ do_standby_follow(void)
|
||||
NULL);
|
||||
}
|
||||
|
||||
if (PQstatus(follow_target_conn) == CONNECTION_OK || runtime_options.wait == false)
|
||||
if (PQstatus(follow_target_conn) == CONNECTION_OK || runtime_options.wait_provided == false)
|
||||
{
|
||||
break;
|
||||
}
|
||||
@@ -2317,7 +2317,7 @@ do_standby_follow(void)
|
||||
log_error(_("unable to connect to target node %i"), follow_target_node_id);
|
||||
}
|
||||
|
||||
if (runtime_options.wait == true)
|
||||
if (runtime_options.wait_provided == true)
|
||||
{
|
||||
if (follow_target_node_id == UNKNOWN_NODE_ID)
|
||||
{
|
||||
|
||||
@@ -35,13 +35,14 @@ typedef struct
|
||||
bool connection_param_provided;
|
||||
bool host_param_provided;
|
||||
bool limit_provided;
|
||||
bool wait_provided;
|
||||
|
||||
/* general configuration options */
|
||||
char config_file[MAXPGPATH];
|
||||
bool dry_run;
|
||||
bool force;
|
||||
char pg_bindir[MAXLEN]; /* overrides setting in repmgr.conf */
|
||||
bool wait;
|
||||
int wait;
|
||||
bool no_wait;
|
||||
|
||||
/* logging options */
|
||||
@@ -137,9 +138,9 @@ typedef struct
|
||||
|
||||
#define T_RUNTIME_OPTIONS_INITIALIZER { \
|
||||
/* configuration metadata */ \
|
||||
false, false, false, false, \
|
||||
false, false, false, false, false, \
|
||||
/* general configuration options */ \
|
||||
"", false, false, "", false, false, \
|
||||
"", false, false, "", -1, false, \
|
||||
/* logging options */ \
|
||||
"", false, false, false, false, \
|
||||
/* output options */ \
|
||||
|
||||
@@ -254,7 +254,11 @@ main(int argc, char **argv)
|
||||
|
||||
/* -w/--wait */
|
||||
case 'w':
|
||||
runtime_options.wait = true;
|
||||
runtime_options.wait_provided = true;
|
||||
if (optarg != NULL)
|
||||
{
|
||||
runtime_options.wait = repmgr_atoi(optarg, "--wait", &cli_errors, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
/* -W/--no-wait */
|
||||
@@ -1713,17 +1717,19 @@ check_cli_parameters(const int action)
|
||||
|
||||
/* --wait/--no-wait */
|
||||
|
||||
if (runtime_options.wait == true && runtime_options.no_wait == true)
|
||||
if (runtime_options.wait_provided == true && runtime_options.no_wait == true)
|
||||
{
|
||||
item_list_append_format(&cli_errors,
|
||||
_("both --wait and --no-wait options provided"));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (runtime_options.wait)
|
||||
if (runtime_options.wait_provided)
|
||||
{
|
||||
switch (action)
|
||||
{
|
||||
case DAEMON_START:
|
||||
case DAEMON_STOP:
|
||||
case STANDBY_FOLLOW:
|
||||
break;
|
||||
default:
|
||||
@@ -1736,6 +1742,8 @@ check_cli_parameters(const int action)
|
||||
{
|
||||
switch (action)
|
||||
{
|
||||
case DAEMON_START:
|
||||
case DAEMON_STOP:
|
||||
case NODE_REJOIN:
|
||||
break;
|
||||
default:
|
||||
|
||||
@@ -116,7 +116,7 @@ static struct option long_options[] =
|
||||
{"dry-run", no_argument, NULL, OPT_DRY_RUN},
|
||||
{"force", no_argument, NULL, 'F'},
|
||||
{"pg_bindir", required_argument, NULL, 'b'},
|
||||
{"wait", no_argument, NULL, 'w'},
|
||||
{"wait", optional_argument, NULL, 'w'},
|
||||
{"no-wait", no_argument, NULL, 'W'},
|
||||
|
||||
/* connection options */
|
||||
|
||||
Reference in New Issue
Block a user