mirror of
https://github.com/postgresml/pgcat.git
synced 2026-03-27 18:56:30 +00:00
removed atomic round-robin
This commit is contained in:
@@ -153,7 +153,7 @@ impl Client {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Client loop. We handle all messages between the client and the database here.
|
/// Client loop. We handle all messages between the client and the database here.
|
||||||
pub async fn handle(&mut self, pool: ConnectionPool) -> Result<(), Error> {
|
pub async fn handle(&mut self, mut pool: ConnectionPool) -> Result<(), Error> {
|
||||||
// Special: cancelling existing running query
|
// Special: cancelling existing running query
|
||||||
if self.cancel_mode {
|
if self.cancel_mode {
|
||||||
let (process_id, secret_key, address, port) = {
|
let (process_id, secret_key, address, port) = {
|
||||||
|
|||||||
75
src/pool.rs
75
src/pool.rs
@@ -9,20 +9,20 @@ use crate::server::Server;
|
|||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::sync::{
|
use std::sync::{
|
||||||
atomic::{AtomicUsize, Ordering},
|
// atomic::{AtomicUsize, Ordering},
|
||||||
Arc, Mutex,
|
Arc, Mutex,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Banlist: bad servers go in here.
|
// Banlist: bad servers go in here.
|
||||||
pub type BanList = Arc<Mutex<Vec<HashMap<Address, NaiveDateTime>>>>;
|
pub type BanList = Arc<Mutex<Vec<HashMap<Address, NaiveDateTime>>>>;
|
||||||
pub type Counter = Arc<AtomicUsize>;
|
// pub type Counter = Arc<AtomicUsize>;
|
||||||
pub type ClientServerMap = Arc<Mutex<HashMap<(i32, i32), (i32, i32, String, String)>>>;
|
pub type ClientServerMap = Arc<Mutex<HashMap<(i32, i32), (i32, i32, String, String)>>>;
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct ConnectionPool {
|
pub struct ConnectionPool {
|
||||||
databases: Vec<Vec<Pool<ServerPool>>>,
|
databases: Vec<Vec<Pool<ServerPool>>>,
|
||||||
addresses: Vec<Vec<Address>>,
|
addresses: Vec<Vec<Address>>,
|
||||||
round_robin: Counter,
|
round_robin: usize,
|
||||||
banlist: BanList,
|
banlist: BanList,
|
||||||
healthcheck_timeout: u64,
|
healthcheck_timeout: u64,
|
||||||
ban_time: i64,
|
ban_time: i64,
|
||||||
@@ -90,10 +90,13 @@ impl ConnectionPool {
|
|||||||
banlist.push(HashMap::new());
|
banlist.push(HashMap::new());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert_eq!(shards.len(), addresses.len());
|
||||||
|
let address_len = addresses.len();
|
||||||
|
|
||||||
ConnectionPool {
|
ConnectionPool {
|
||||||
databases: shards,
|
databases: shards,
|
||||||
addresses: addresses,
|
addresses: addresses,
|
||||||
round_robin: Arc::new(AtomicUsize::new(0)),
|
round_robin: rand::random::<usize>() % address_len, // Start at a random replica
|
||||||
banlist: Arc::new(Mutex::new(banlist)),
|
banlist: Arc::new(Mutex::new(banlist)),
|
||||||
healthcheck_timeout: config.general.healthcheck_timeout,
|
healthcheck_timeout: config.general.healthcheck_timeout,
|
||||||
ban_time: config.general.ban_time,
|
ban_time: config.general.ban_time,
|
||||||
@@ -103,7 +106,7 @@ impl ConnectionPool {
|
|||||||
|
|
||||||
/// Get a connection from the pool.
|
/// Get a connection from the pool.
|
||||||
pub async fn get(
|
pub async fn get(
|
||||||
&self,
|
&mut self,
|
||||||
shard: Option<usize>,
|
shard: Option<usize>,
|
||||||
role: Option<Role>,
|
role: Option<Role>,
|
||||||
) -> Result<(PooledConnection<'_, ServerPool>, Address), Error> {
|
) -> Result<(PooledConnection<'_, ServerPool>, Address), Error> {
|
||||||
@@ -115,40 +118,48 @@ impl ConnectionPool {
|
|||||||
None => 0, // TODO: pick a shard at random
|
None => 0, // TODO: pick a shard at random
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut allowed_attempts = match role {
|
let addresses = &self.addresses[shard];
|
||||||
// Primary-specific queries get one attempt, if the primary is down,
|
|
||||||
// nothing we should do about it I think. It's dangerous to retry
|
// Make sure if a specific role is requested, it's available in the pool.
|
||||||
// write queries.
|
match role {
|
||||||
Some(Role::Primary) => {
|
Some(role) => {
|
||||||
// Make sure we have a primary in the pool configured.
|
let role_count = addresses
|
||||||
let primary_present = self.addresses[shard]
|
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|&db| db.role == Role::Primary)
|
.filter(|&db| db.role == Role::Primary)
|
||||||
.count();
|
.count();
|
||||||
|
|
||||||
// TODO: return this error to the client, so people don't have to look in
|
if role_count == 0 {
|
||||||
// the logs to figure out what happened.
|
println!(
|
||||||
if primary_present == 0 {
|
">> Error: Role '{:?}' requested, but none are configured.",
|
||||||
println!(">> Error: Primary requested but none are configured.");
|
role
|
||||||
|
);
|
||||||
|
|
||||||
return Err(Error::AllServersDown);
|
return Err(Error::AllServersDown);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Primary gets one attempt.
|
|
||||||
1
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Any role should be present.
|
||||||
|
_ => (),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut allowed_attempts = match role {
|
||||||
|
// Primary-specific queries get one attempt, if the primary is down,
|
||||||
|
// nothing we should do about it I think. It's dangerous to retry
|
||||||
|
// write queries.
|
||||||
|
Some(Role::Primary) => 1,
|
||||||
|
|
||||||
// Replicas get to try as many times as there are replicas
|
// Replicas get to try as many times as there are replicas
|
||||||
// and connections in the pool.
|
// and connections in the pool.
|
||||||
_ => self.databases[shard].len() * self.pool_size as usize,
|
_ => self.databases[shard].len() * self.pool_size as usize,
|
||||||
};
|
};
|
||||||
|
|
||||||
while allowed_attempts > 0 {
|
while allowed_attempts > 0 {
|
||||||
// TODO: think about making this local, so multiple clients
|
// Round-robin each client's queries.
|
||||||
// don't compete for the same round-robin integer.
|
// If a client only sends one query and then disconnects, it doesn't matter
|
||||||
// Especially since we're going to be skipping (see role selection below).
|
// which replica it'll go to.
|
||||||
let index =
|
self.round_robin += 1;
|
||||||
self.round_robin.fetch_add(1, Ordering::SeqCst) % self.databases[shard].len();
|
let index = self.round_robin % addresses.len();
|
||||||
let address = self.addresses[shard][index].clone();
|
let address = &addresses[index];
|
||||||
|
|
||||||
// Make sure you're getting a primary or a replica
|
// Make sure you're getting a primary or a replica
|
||||||
// as per request.
|
// as per request.
|
||||||
@@ -158,14 +169,14 @@ impl ConnectionPool {
|
|||||||
// we'll do our best to pick it, but if we only
|
// we'll do our best to pick it, but if we only
|
||||||
// have one server in the cluster, it's probably only a primary
|
// have one server in the cluster, it's probably only a primary
|
||||||
// (or only a replica), so the client will just get what we have.
|
// (or only a replica), so the client will just get what we have.
|
||||||
if address.role != role && self.addresses[shard].len() > 1 {
|
if address.role != role && addresses.len() > 1 {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => (),
|
None => (),
|
||||||
};
|
};
|
||||||
|
|
||||||
if self.is_banned(&address, shard, role) {
|
if self.is_banned(address, shard, role) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -177,13 +188,13 @@ impl ConnectionPool {
|
|||||||
Ok(conn) => conn,
|
Ok(conn) => conn,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
println!(">> Banning replica {}, error: {:?}", index, err);
|
println!(">> Banning replica {}, error: {:?}", index, err);
|
||||||
self.ban(&address, shard);
|
self.ban(address, shard);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if !with_health_check {
|
if !with_health_check {
|
||||||
return Ok((conn, address));
|
return Ok((conn, address.clone()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// // Check if this server is alive with a health check
|
// // Check if this server is alive with a health check
|
||||||
@@ -197,7 +208,7 @@ impl ConnectionPool {
|
|||||||
{
|
{
|
||||||
// Check if health check succeeded
|
// Check if health check succeeded
|
||||||
Ok(res) => match res {
|
Ok(res) => match res {
|
||||||
Ok(_) => return Ok((conn, address)),
|
Ok(_) => return Ok((conn, address.clone())),
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
println!(
|
println!(
|
||||||
">> Banning replica {} because of failed health check",
|
">> Banning replica {} because of failed health check",
|
||||||
@@ -206,7 +217,7 @@ impl ConnectionPool {
|
|||||||
// Don't leave a bad connection in the pool.
|
// Don't leave a bad connection in the pool.
|
||||||
server.mark_bad();
|
server.mark_bad();
|
||||||
|
|
||||||
self.ban(&address, shard);
|
self.ban(address, shard);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -219,7 +230,7 @@ impl ConnectionPool {
|
|||||||
// Don't leave a bad connection in the pool.
|
// Don't leave a bad connection in the pool.
|
||||||
server.mark_bad();
|
server.mark_bad();
|
||||||
|
|
||||||
self.ban(&address, shard);
|
self.ban(address, shard);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user