From 5d2efe8634392e37ee4cea09bd565fb25b8c8d86 Mon Sep 17 00:00:00 2001 From: Ian Barwick Date: Wed, 1 Mar 2017 14:26:39 +0900 Subject: [PATCH] repmgr: improve logging of rsync actions In particular, copy_remote_files() would report any kind of non-zero exit status from rsync as an error, even though when cloning data directories and tablespaces we explicitly ignore the "vanished files" status (code 24) as it's expected behaviour for files in these locations to disappear during the rsync copy process. --- HISTORY | 3 ++- repmgr.c | 48 ++++++++++++++++++++++++++---------------------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/HISTORY b/HISTORY index b6e69b25..2c04f9ee 100644 --- a/HISTORY +++ b/HISTORY @@ -2,13 +2,14 @@ repmgr: support --wal-method (replacing --xlog-method) for pg_basebackup in PostgreSQL 10 -3.3.1 2017-02- +3.3.1 2017-03- repmgrd: prevent invalid apply lag value being written to the monitoring table (Ian) repmgrd: fix error in XLogRecPtr conversion when calculating monitoring statistics (Ian) repmgr: if replication slots in use, where possible delete slot on old upstream node after following new upstream (Ian) + repmgr: improve logging of rsync actions (Ian) 3.3 2016-12-27 repmgr: always log to STDERR even if log facility defined (Ian) diff --git a/repmgr.c b/repmgr.c index 9c7afd98..c9361a92 100644 --- a/repmgr.c +++ b/repmgr.c @@ -3670,15 +3670,16 @@ do_standby_clone(void) master_data_directory, local_data_directory, true, server_version_num); /* - Exit code 0 means no error, but we want to ignore exit code 24 as well - as rsync returns that code on "Partial transfer due to vanished source files". - It's quite common for this to happen on the data directory, particularly - with long running rsync on a busy server. - */ - if (!WIFEXITED(r) && WEXITSTATUS(r) != 24) + * Exit code 0 means no error, but we want to ignore exit code 24 as well + * as rsync returns that code on "Partial transfer due to vanished source files". + * It's quite common for this to happen on the data directory, particularly + * with long running rsync on a busy server. + */ + if (WIFEXITED(r) && WEXITSTATUS(r) && WEXITSTATUS(r) != 24) { - log_warning(_("standby clone: failed copying master data directory '%s'\n"), + log_err(_("standby clone: failed copying master data directory '%s'\n"), master_data_directory); + r = retval = ERR_BAD_RSYNC; goto stop_backup; } @@ -3762,15 +3763,16 @@ do_standby_clone(void) true, server_version_num); /* - Exit code 0 means no error, but we want to ignore exit code 24 as well - as rsync returns that code on "Partial transfer due to vanished source files". - It's quite common for this to happen on the data directory, particularly - with long running rsync on a busy server. - */ - if (!WIFEXITED(r) && WEXITSTATUS(r) != 24) + * Exit code 0 means no error, but we want to ignore exit code 24 as well + * as rsync returns that code on "Partial transfer due to vanished source files". + * It's quite common for this to happen on the data directory, particularly + * with long running rsync on a busy server. + */ + if (WIFEXITED(r) && WEXITSTATUS(r) && WEXITSTATUS(r) != 24) { - log_warning(_("standby clone: failed copying tablespace directory '%s'\n"), - cell_t->location); + log_err(_("standby clone: failed copying tablespace directory '%s'\n"), + cell_t->location); + r = retval = ERR_BAD_RSYNC; goto stop_backup; } } @@ -3778,7 +3780,7 @@ do_standby_clone(void) /* * If a valid mapping was provide for this tablespace, arrange for it to * be remapped - * (if no tablespace mappings was provided, the link will be copied as-is + * (if no tablespace mapping was provided, the link will be copied as-is * by pg_basebackup or rsync and no action is required) */ if (mapping_found == true || mode == barman) @@ -3937,7 +3939,7 @@ do_standby_clone(void) r = copy_remote_files(runtime_options.host, runtime_options.remote_user, file->filepath, dest_path, false, server_version_num); - if (r != 0) + if (WEXITSTATUS(r)) { log_err(_("standby clone: unable to copy config file '%s'\n"), file->filename); @@ -3971,7 +3973,7 @@ do_standby_clone(void) r = copy_remote_files(runtime_options.host, runtime_options.remote_user, master_control_file, local_control_file, false, server_version_num); - if (r != 0) + if (WEXITSTATUS(r)) { log_warning(_("standby clone: failed copying master control file '%s'\n"), master_control_file); @@ -6194,7 +6196,7 @@ do_witness_create(void) r = copy_remote_files(runtime_options.host, runtime_options.remote_user, master_hba_file, runtime_options.dest_dir, false, -1); - if (r != 0) + if (WEXITSTATUS(r)) { char *errmsg = _("Unable to copy pg_hba.conf from master"); log_err("%s\n", errmsg); @@ -6960,9 +6962,11 @@ copy_remote_files(char *host, char *remote_user, char *remote_path, r = system(script); - if (r != 0) - log_err(_("unable to rsync from remote host (%s:%s)\n"), - host_string, remote_path); + log_debug("copy_remote_files(): r = %i; WIFEXITED: %i; WEXITSTATUS: %i\n", r, WIFEXITED(r), WEXITSTATUS(r)); + + /* exit code 24 indicates vanished files, which isn't a problem for us */ + if (WEXITSTATUS(r) && WEXITSTATUS(r) != 24) + log_verbose(LOG_WARNING, "copy_remote_files(): rsync returned unexpected exit status %i \n", WEXITSTATUS(r)); return r; }