removed atomic round-robin

This commit is contained in:
Lev Kokotov
2022-02-10 10:37:49 -08:00
parent c1476d29da
commit 22c6f13dc7
2 changed files with 44 additions and 33 deletions

View File

@@ -153,7 +153,7 @@ impl Client {
} }
/// Client loop. We handle all messages between the client and the database here. /// Client loop. We handle all messages between the client and the database here.
pub async fn handle(&mut self, pool: ConnectionPool) -> Result<(), Error> { pub async fn handle(&mut self, mut pool: ConnectionPool) -> Result<(), Error> {
// Special: cancelling existing running query // Special: cancelling existing running query
if self.cancel_mode { if self.cancel_mode {
let (process_id, secret_key, address, port) = { let (process_id, secret_key, address, port) = {

View File

@@ -9,20 +9,20 @@ use crate::server::Server;
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::{ use std::sync::{
atomic::{AtomicUsize, Ordering}, // atomic::{AtomicUsize, Ordering},
Arc, Mutex, Arc, Mutex,
}; };
// Banlist: bad servers go in here. // Banlist: bad servers go in here.
pub type BanList = Arc<Mutex<Vec<HashMap<Address, NaiveDateTime>>>>; pub type BanList = Arc<Mutex<Vec<HashMap<Address, NaiveDateTime>>>>;
pub type Counter = Arc<AtomicUsize>; // pub type Counter = Arc<AtomicUsize>;
pub type ClientServerMap = Arc<Mutex<HashMap<(i32, i32), (i32, i32, String, String)>>>; pub type ClientServerMap = Arc<Mutex<HashMap<(i32, i32), (i32, i32, String, String)>>>;
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct ConnectionPool { pub struct ConnectionPool {
databases: Vec<Vec<Pool<ServerPool>>>, databases: Vec<Vec<Pool<ServerPool>>>,
addresses: Vec<Vec<Address>>, addresses: Vec<Vec<Address>>,
round_robin: Counter, round_robin: usize,
banlist: BanList, banlist: BanList,
healthcheck_timeout: u64, healthcheck_timeout: u64,
ban_time: i64, ban_time: i64,
@@ -90,10 +90,13 @@ impl ConnectionPool {
banlist.push(HashMap::new()); banlist.push(HashMap::new());
} }
assert_eq!(shards.len(), addresses.len());
let address_len = addresses.len();
ConnectionPool { ConnectionPool {
databases: shards, databases: shards,
addresses: addresses, addresses: addresses,
round_robin: Arc::new(AtomicUsize::new(0)), round_robin: rand::random::<usize>() % address_len, // Start at a random replica
banlist: Arc::new(Mutex::new(banlist)), banlist: Arc::new(Mutex::new(banlist)),
healthcheck_timeout: config.general.healthcheck_timeout, healthcheck_timeout: config.general.healthcheck_timeout,
ban_time: config.general.ban_time, ban_time: config.general.ban_time,
@@ -103,7 +106,7 @@ impl ConnectionPool {
/// Get a connection from the pool. /// Get a connection from the pool.
pub async fn get( pub async fn get(
&self, &mut self,
shard: Option<usize>, shard: Option<usize>,
role: Option<Role>, role: Option<Role>,
) -> Result<(PooledConnection<'_, ServerPool>, Address), Error> { ) -> Result<(PooledConnection<'_, ServerPool>, Address), Error> {
@@ -115,40 +118,48 @@ impl ConnectionPool {
None => 0, // TODO: pick a shard at random None => 0, // TODO: pick a shard at random
}; };
let mut allowed_attempts = match role { let addresses = &self.addresses[shard];
// Primary-specific queries get one attempt, if the primary is down,
// nothing we should do about it I think. It's dangerous to retry // Make sure if a specific role is requested, it's available in the pool.
// write queries. match role {
Some(Role::Primary) => { Some(role) => {
// Make sure we have a primary in the pool configured. let role_count = addresses
let primary_present = self.addresses[shard]
.iter() .iter()
.filter(|&db| db.role == Role::Primary) .filter(|&db| db.role == Role::Primary)
.count(); .count();
// TODO: return this error to the client, so people don't have to look in if role_count == 0 {
// the logs to figure out what happened. println!(
if primary_present == 0 { ">> Error: Role '{:?}' requested, but none are configured.",
println!(">> Error: Primary requested but none are configured."); role
);
return Err(Error::AllServersDown); return Err(Error::AllServersDown);
} }
// Primary gets one attempt.
1
} }
// Any role should be present.
_ => (),
};
let mut allowed_attempts = match role {
// Primary-specific queries get one attempt, if the primary is down,
// nothing we should do about it I think. It's dangerous to retry
// write queries.
Some(Role::Primary) => 1,
// Replicas get to try as many times as there are replicas // Replicas get to try as many times as there are replicas
// and connections in the pool. // and connections in the pool.
_ => self.databases[shard].len() * self.pool_size as usize, _ => self.databases[shard].len() * self.pool_size as usize,
}; };
while allowed_attempts > 0 { while allowed_attempts > 0 {
// TODO: think about making this local, so multiple clients // Round-robin each client's queries.
// don't compete for the same round-robin integer. // If a client only sends one query and then disconnects, it doesn't matter
// Especially since we're going to be skipping (see role selection below). // which replica it'll go to.
let index = self.round_robin += 1;
self.round_robin.fetch_add(1, Ordering::SeqCst) % self.databases[shard].len(); let index = self.round_robin % addresses.len();
let address = self.addresses[shard][index].clone(); let address = &addresses[index];
// Make sure you're getting a primary or a replica // Make sure you're getting a primary or a replica
// as per request. // as per request.
@@ -158,14 +169,14 @@ impl ConnectionPool {
// we'll do our best to pick it, but if we only // we'll do our best to pick it, but if we only
// have one server in the cluster, it's probably only a primary // have one server in the cluster, it's probably only a primary
// (or only a replica), so the client will just get what we have. // (or only a replica), so the client will just get what we have.
if address.role != role && self.addresses[shard].len() > 1 { if address.role != role && addresses.len() > 1 {
continue; continue;
} }
} }
None => (), None => (),
}; };
if self.is_banned(&address, shard, role) { if self.is_banned(address, shard, role) {
continue; continue;
} }
@@ -177,13 +188,13 @@ impl ConnectionPool {
Ok(conn) => conn, Ok(conn) => conn,
Err(err) => { Err(err) => {
println!(">> Banning replica {}, error: {:?}", index, err); println!(">> Banning replica {}, error: {:?}", index, err);
self.ban(&address, shard); self.ban(address, shard);
continue; continue;
} }
}; };
if !with_health_check { if !with_health_check {
return Ok((conn, address)); return Ok((conn, address.clone()));
} }
// // Check if this server is alive with a health check // // Check if this server is alive with a health check
@@ -197,7 +208,7 @@ impl ConnectionPool {
{ {
// Check if health check succeeded // Check if health check succeeded
Ok(res) => match res { Ok(res) => match res {
Ok(_) => return Ok((conn, address)), Ok(_) => return Ok((conn, address.clone())),
Err(_) => { Err(_) => {
println!( println!(
">> Banning replica {} because of failed health check", ">> Banning replica {} because of failed health check",
@@ -206,7 +217,7 @@ impl ConnectionPool {
// Don't leave a bad connection in the pool. // Don't leave a bad connection in the pool.
server.mark_bad(); server.mark_bad();
self.ban(&address, shard); self.ban(address, shard);
continue; continue;
} }
}, },
@@ -219,7 +230,7 @@ impl ConnectionPool {
// Don't leave a bad connection in the pool. // Don't leave a bad connection in the pool.
server.mark_bad(); server.mark_bad();
self.ban(&address, shard); self.ban(address, shard);
continue; continue;
} }
} }