mirror of
https://github.com/postgresml/pgcat.git
synced 2026-03-23 01:16:30 +00:00
Health check delay (#118)
* initial commit of server check delay implementation * fmt * spelling * Update name to last_healthcheck and some comments * Moved server tested stat to after require_healthcheck check * Make health check delay configurable * Rename to last_activity * Fix typo * Add debug log for healthcheck * Add address to debug log
This commit is contained in:
36
src/pool.rs
36
src/pool.rs
@@ -251,7 +251,7 @@ impl ConnectionPool {
|
||||
|
||||
/// Get a connection from the pool.
|
||||
pub async fn get(
|
||||
&mut self,
|
||||
&self,
|
||||
shard: usize, // shard number
|
||||
role: Option<Role>, // primary or replica
|
||||
process_id: i32, // client id
|
||||
@@ -283,6 +283,9 @@ impl ConnectionPool {
|
||||
return Err(Error::BadConfig);
|
||||
}
|
||||
|
||||
let healthcheck_timeout = get_config().general.healthcheck_timeout;
|
||||
let healthcheck_delay = get_config().general.healthcheck_delay as u128;
|
||||
|
||||
while allowed_attempts > 0 {
|
||||
// Round-robin replicas.
|
||||
round_robin += 1;
|
||||
@@ -312,7 +315,7 @@ impl ConnectionPool {
|
||||
Ok(conn) => conn,
|
||||
Err(err) => {
|
||||
error!("Banning replica {}, error: {:?}", index, err);
|
||||
self.ban(address, shard);
|
||||
self.ban(address, shard, process_id);
|
||||
self.stats.client_disconnecting(process_id, address.id);
|
||||
self.stats
|
||||
.checkout_time(now.elapsed().as_micros(), process_id, address.id);
|
||||
@@ -322,8 +325,19 @@ impl ConnectionPool {
|
||||
|
||||
// // Check if this server is alive with a health check.
|
||||
let server = &mut *conn;
|
||||
let healthcheck_timeout = get_config().general.healthcheck_timeout;
|
||||
|
||||
// Will return error if timestamp is greater than current system time, which it should never be set to
|
||||
let require_healthcheck =
|
||||
server.last_activity().elapsed().unwrap().as_millis() > healthcheck_delay;
|
||||
|
||||
if !require_healthcheck {
|
||||
self.stats
|
||||
.checkout_time(now.elapsed().as_micros(), process_id, address.id);
|
||||
self.stats.server_idle(conn.process_id(), address.id);
|
||||
return Ok((conn, address.clone()));
|
||||
}
|
||||
|
||||
debug!("Running health check for replica {}, {:?}", index, address);
|
||||
self.stats.server_tested(server.process_id(), address.id);
|
||||
|
||||
match tokio::time::timeout(
|
||||
@@ -348,10 +362,7 @@ impl ConnectionPool {
|
||||
// Don't leave a bad connection in the pool.
|
||||
server.mark_bad();
|
||||
|
||||
self.ban(address, shard);
|
||||
self.stats.client_disconnecting(process_id, address.id);
|
||||
self.stats
|
||||
.checkout_time(now.elapsed().as_micros(), process_id, address.id);
|
||||
self.ban(address, shard, process_id);
|
||||
continue;
|
||||
}
|
||||
},
|
||||
@@ -362,10 +373,7 @@ impl ConnectionPool {
|
||||
// Don't leave a bad connection in the pool.
|
||||
server.mark_bad();
|
||||
|
||||
self.ban(address, shard);
|
||||
self.stats.client_disconnecting(process_id, address.id);
|
||||
self.stats
|
||||
.checkout_time(now.elapsed().as_micros(), process_id, address.id);
|
||||
self.ban(address, shard, process_id);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -377,7 +385,11 @@ impl ConnectionPool {
|
||||
/// Ban an address (i.e. replica). It no longer will serve
|
||||
/// traffic for any new transactions. Existing transactions on that replica
|
||||
/// will finish successfully or error out to the clients.
|
||||
pub fn ban(&self, address: &Address, shard: usize) {
|
||||
pub fn ban(&self, address: &Address, shard: usize, process_id: i32) {
|
||||
self.stats.client_disconnecting(process_id, address.id);
|
||||
self.stats
|
||||
.checkout_time(Instant::now().elapsed().as_micros(), process_id, address.id);
|
||||
|
||||
error!("Banning {:?}", address);
|
||||
let now = chrono::offset::Utc::now().naive_utc();
|
||||
let mut guard = self.banlist.write();
|
||||
|
||||
Reference in New Issue
Block a user