From 863a3fab530daf21ebb1d4e7789a983ec854d120 Mon Sep 17 00:00:00 2001 From: Jaime Casanova Date: Fri, 1 Oct 2010 02:26:39 -0500 Subject: [PATCH] A few changes from repmgrd and improve the SQL of the repl_status view in order to actually show something useful --- repmgr.sql | 22 +++++++++++++++++----- repmgrd.c | 34 +++++++++++++++++++--------------- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/repmgr.sql b/repmgr.sql index e7d48741..d625f441 100644 --- a/repmgr.sql +++ b/repmgr.sql @@ -26,11 +26,23 @@ CREATE TABLE repl_monitor ( /* - * A useful view + * This view shows the latest monitor info about every node. + * Interesting thing to see: + * replication_lag: in bytes (this is how far the latest xlog record + * we have received is from master) + * apply_lag: in bytes (this is how far the latest xlog record + * we have applied is from the latest record we + * have received) + * time-lag: how many seconds are we from being up-to-date with master */ drop view if exists repl_status; CREATE VIEW repl_status AS -SELECT *, now() - (select max(last_monitor_time) from repl_monitor b - where b.primary_node = a.primary_node - and b.standby_node = a.standby_node) - FROM repl_monitor a; +WITH monitor_info AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY primary_node, standby_node + ORDER BY last_monitor_time desc) + FROM repl_monitor) +SELECT primary_node, standby_node, last_monitor_time, last_wal_primary_location, + last_wal_standby_location, pg_size_pretty(replication_lag) replication_lag, + pg_size_pretty(apply_lag) apply_lag, + age(now(), last_monitor_time) AS time_lag + FROM monitor_info a + WHERE row_number = 1; diff --git a/repmgrd.c b/repmgrd.c index f76a4c09..ad89f28d 100644 --- a/repmgrd.c +++ b/repmgrd.c @@ -33,14 +33,14 @@ bool verbose = false; static void help(const char *progname); -void checkClusterConfiguration(void); -void checkNodeConfiguration(char *conninfo); -void getPrimaryConnection(void); +static void checkClusterConfiguration(void); +static void checkNodeConfiguration(char *conninfo); +static void getPrimaryConnection(void); -void MonitorCheck(void); -void MonitorExecute(void); +static void MonitorCheck(void); +static void MonitorExecute(void); -unsigned long long int walLocationToBytes(char *wal_location); +static unsigned long long int walLocationToBytes(char *wal_location); int @@ -111,6 +111,13 @@ main(int argc, char **argv) * and start monitor */ myLocalMode = is_standby(myLocalConn) ? STANDBY_MODE : PRIMARY_MODE; + if (myLocalMode == PRIMARY_MODE) + { + primaryId = myLocalId; + strcpy(primaryConninfo, conninfo); + primaryConn = myLocalConn; + } + checkClusterConfiguration(); checkNodeConfiguration(conninfo); if (myLocalMode == STANDBY_MODE) @@ -132,9 +139,7 @@ main(int argc, char **argv) * This function ask if we are in recovery, if false we are the primary else * we are a standby */ - - -void +static void getPrimaryConnection(void) { PGresult *res1; @@ -199,7 +204,7 @@ getPrimaryConnection(void) } -void +static void MonitorCheck(void) { /* * Every 3 seconds, insert monitor info @@ -215,8 +220,7 @@ MonitorCheck(void) { /* * Check if its time for next monitor call and if so, do it. */ - -void +static void MonitorExecute(void) { PGresult *res; @@ -291,7 +295,7 @@ MonitorExecute(void) } -void +static void checkClusterConfiguration(void) { PGresult *res; @@ -324,7 +328,7 @@ checkClusterConfiguration(void) } -void +static void checkNodeConfiguration(char *conninfo) { PGresult *res; @@ -372,7 +376,7 @@ checkNodeConfiguration(char *conninfo) } -unsigned long long int +static unsigned long long int walLocationToBytes(char *wal_location) { unsigned int xlogid;