Delete any replication slots copied by pg_rewind

If --force-rewind is used in conjunction with "repmgr node rejoin",
any replication slots present on the source node will be copied too;
it's essential to remove these to prevent stale slots being extant
when the node starts up.

We do this at file system level *before* the server starts to minimize
the risk of any problems.

Addresses GitHub #334
This commit is contained in:
Ian Barwick
2017-11-24 11:11:24 +09:00
parent e0560c3e70
commit c20475f94a
3 changed files with 67 additions and 6 deletions

View File

@@ -311,6 +311,14 @@ create_pg_dir(char *path, bool force)
return true;
}
int
rmdir_recursive(char *path)
{
return nftw(path, unlink_dir_callback, 64, FTW_DEPTH | FTW_PHYS);
}
static int
unlink_dir_callback(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)
{

View File

@@ -26,5 +26,5 @@ extern int check_dir(char *path);
extern bool create_dir(char *path);
extern bool is_pg_dir(char *path);
extern bool create_pg_dir(char *path, bool force);
extern int rmdir_recursive(char *path);
#endif

View File

@@ -1769,19 +1769,72 @@ do_node_rejoin(void)
log_detail("%s", strerror(errno));
}
}
termPQExpBuffer(&filebuf);
/* delete any replication slots copied in by pg_rewind */
{
PQExpBufferData slotdir_path;
DIR *slotdir;
struct dirent *slotdir_ent;
initPQExpBuffer(&slotdir_path);
appendPQExpBuffer(&slotdir_path,
"%s/pg_replslot",
config_file_options.data_directory);
slotdir = opendir(slotdir_path.data);
if (slotdir == NULL)
{
log_warning(_("unable to open replication slot directory \"%s\""),
slotdir_path.data);
log_detail("%s", strerror(errno));
}
else
{
while ((slotdir_ent = readdir(slotdir)) != NULL) {
struct stat statbuf;
PQExpBufferData slotdir_ent_path;
if(strcmp(slotdir_ent->d_name, ".") == 0 || strcmp(slotdir_ent->d_name, "..") == 0)
continue;
initPQExpBuffer(&slotdir_ent_path);
appendPQExpBuffer(&slotdir_ent_path,
"%s/%s",
slotdir_path.data,
slotdir_ent->d_name);
if (stat(slotdir_ent_path.data, &statbuf) == 0 && !S_ISDIR(statbuf.st_mode))
{
termPQExpBuffer(&slotdir_ent_path);
continue;
}
log_debug("deleting slot directory \"%s\"", slotdir_ent_path.data);
if (rmdir_recursive(slotdir_ent_path.data) != 0 && errno != EEXIST)
{
log_warning(_("unable to delete replication slot directory \"%s\""), slotdir_ent_path.data);
log_detail("%s", strerror(errno));
log_hint(_("directory may need to be manually removed"));
}
termPQExpBuffer(&slotdir_ent_path);
}
}
termPQExpBuffer(&slotdir_path);
}
}
initPQExpBuffer(&follow_output);
success = do_standby_follow_internal(
upstream_conn,
success = do_standby_follow_internal(upstream_conn,
&primary_node_record,
&follow_output);
create_event_notification(
upstream_conn,
create_event_notification(upstream_conn,
&config_file_options,
config_file_options.node_id,
"node_rejoin",