Files
pgcat/src/client.rs

445 lines
16 KiB
Rust
Raw Normal View History

2022-02-04 09:28:52 -08:00
/// Implementation of the PostgreSQL client.
/// We are pretending to the server in this scenario,
/// and this module implements that.
use bytes::{Buf, BufMut, BytesMut};
2022-02-08 13:11:50 -08:00
use regex::Regex;
2022-02-05 10:02:13 -08:00
use tokio::io::{AsyncReadExt, BufReader};
2022-02-03 15:17:04 -08:00
use tokio::net::tcp::{OwnedReadHalf, OwnedWriteHalf};
2022-02-03 13:35:40 -08:00
use tokio::net::TcpStream;
2022-02-09 20:02:20 -08:00
use crate::config::Role;
2022-02-03 13:35:40 -08:00
use crate::errors::Error;
use crate::messages::*;
2022-02-05 18:20:53 -08:00
use crate::pool::{ClientServerMap, ConnectionPool};
2022-02-04 16:01:35 -08:00
use crate::server::Server;
2022-02-08 13:11:50 -08:00
use crate::sharding::Sharder;
const SHARDING_REGEX: &str = r"SET SHARDING KEY TO '[0-9]+';";
2022-02-09 20:02:20 -08:00
const ROLE_REGEX: &str = r"SET SERVER ROLE TO '(PRIMARY|REPLICA)';";
2022-02-03 13:35:40 -08:00
/// The client state. One of these is created per client.
2022-02-03 13:35:40 -08:00
pub struct Client {
// The reads are buffered (8K by default).
2022-02-03 13:54:07 -08:00
read: BufReader<OwnedReadHalf>,
// We buffer the writes ourselves because we know the protocol
// better than a stock buffer.
2022-02-03 13:54:07 -08:00
write: OwnedWriteHalf,
// Internal buffer, where we place messages until we have to flush
// them to the backend.
2022-02-03 15:33:26 -08:00
buffer: BytesMut,
// The client was started with the sole reason to cancel another running query.
2022-02-04 09:28:52 -08:00
cancel_mode: bool,
// In transaction mode, the connection is released after each transaction.
// Session mode has slightly higher throughput per client, but lower capacity.
2022-02-05 15:23:21 -08:00
transaction_mode: bool,
// For query cancellation, the client is given a random process ID and secret on startup.
2022-02-04 09:28:52 -08:00
process_id: i32,
secret_key: i32,
// Clients are mapped to servers while they use them. This allows a client
// to connect and cancel a query.
2022-02-04 16:01:35 -08:00
client_server_map: ClientServerMap,
2022-02-08 13:11:50 -08:00
// sharding regex
sharding_regex: Regex,
2022-02-09 20:02:20 -08:00
// role detection regex
role_regex: Regex,
2022-02-03 13:35:40 -08:00
}
impl Client {
2022-02-04 09:28:52 -08:00
/// Given a TCP socket, trick the client into thinking we are
/// the Postgres server. Perform the authentication and place
/// the client in query-ready mode.
2022-02-04 16:01:35 -08:00
pub async fn startup(
mut stream: TcpStream,
client_server_map: ClientServerMap,
2022-02-08 09:25:59 -08:00
transaction_mode: bool,
2022-02-04 16:01:35 -08:00
) -> Result<Client, Error> {
2022-02-08 13:11:50 -08:00
let sharding_regex = Regex::new(SHARDING_REGEX).unwrap();
2022-02-09 20:02:20 -08:00
let role_regex = Regex::new(ROLE_REGEX).unwrap();
2022-02-08 13:11:50 -08:00
2022-02-03 13:35:40 -08:00
loop {
// Could be StartupMessage or SSLRequest
// which makes this variable length.
let len = match stream.read_i32().await {
Ok(len) => len,
Err(_) => return Err(Error::ClientBadStartup),
};
// Read whatever is left.
let mut startup = vec![0u8; len as usize - 4];
match stream.read_exact(&mut startup).await {
Ok(_) => (),
Err(_) => return Err(Error::ClientBadStartup),
};
let mut bytes = BytesMut::from(&startup[..]);
let code = bytes.get_i32();
match code {
// Client wants SSL. We don't support it at the moment.
80877103 => {
let mut no = BytesMut::with_capacity(1);
no.put_u8(b'N');
write_all(&mut stream, no).await?;
2022-02-03 15:17:04 -08:00
}
2022-02-03 13:35:40 -08:00
// Regular startup message.
196608 => {
// TODO: perform actual auth.
// TODO: record startup parameters client sends over.
2022-02-04 09:28:52 -08:00
// Generate random backend ID and secret key
let process_id: i32 = rand::random();
let secret_key: i32 = rand::random();
2022-02-03 13:35:40 -08:00
auth_ok(&mut stream).await?;
2022-02-03 18:02:50 -08:00
server_parameters(&mut stream).await?;
2022-02-04 09:28:52 -08:00
backend_key_data(&mut stream, process_id, secret_key).await?;
2022-02-03 13:35:40 -08:00
ready_for_query(&mut stream).await?;
// Split the read and write streams
// so we can control buffering.
2022-02-03 13:54:07 -08:00
let (read, write) = stream.into_split();
2022-02-03 13:35:40 -08:00
return Ok(Client {
2022-02-03 13:54:07 -08:00
read: BufReader::new(read),
write: write,
2022-02-03 15:33:26 -08:00
buffer: BytesMut::with_capacity(8196),
2022-02-04 09:28:52 -08:00
cancel_mode: false,
2022-02-08 09:25:59 -08:00
transaction_mode: transaction_mode,
2022-02-04 09:28:52 -08:00
process_id: process_id,
secret_key: secret_key,
2022-02-04 16:01:35 -08:00
client_server_map: client_server_map,
2022-02-08 13:11:50 -08:00
sharding_regex: sharding_regex,
2022-02-09 20:02:20 -08:00
role_regex: role_regex,
2022-02-04 09:28:52 -08:00
});
}
// Query cancel request.
2022-02-04 09:28:52 -08:00
80877102 => {
let (read, write) = stream.into_split();
let process_id = bytes.get_i32();
let secret_key = bytes.get_i32();
return Ok(Client {
read: BufReader::new(read),
write: write,
buffer: BytesMut::with_capacity(8196),
cancel_mode: true,
2022-02-08 09:25:59 -08:00
transaction_mode: transaction_mode,
2022-02-04 09:28:52 -08:00
process_id: process_id,
secret_key: secret_key,
2022-02-04 16:01:35 -08:00
client_server_map: client_server_map,
2022-02-08 13:11:50 -08:00
sharding_regex: sharding_regex,
2022-02-09 20:02:20 -08:00
role_regex: role_regex,
2022-02-03 13:35:40 -08:00
});
2022-02-03 15:17:04 -08:00
}
2022-02-03 13:35:40 -08:00
_ => {
return Err(Error::ProtocolSyncError);
}
};
}
}
2022-02-03 13:54:07 -08:00
2022-02-04 09:28:52 -08:00
/// Client loop. We handle all messages between the client and the database here.
2022-02-05 18:20:53 -08:00
pub async fn handle(&mut self, pool: ConnectionPool) -> Result<(), Error> {
2022-02-04 09:28:52 -08:00
// Special: cancelling existing running query
if self.cancel_mode {
2022-02-05 10:02:13 -08:00
let (process_id, secret_key, address, port) = {
2022-02-04 16:01:35 -08:00
let guard = self.client_server_map.lock().unwrap();
match guard.get(&(self.process_id, self.secret_key)) {
// Drop the mutex as soon as possible.
2022-02-05 10:02:13 -08:00
Some((process_id, secret_key, address, port)) => (
process_id.clone(),
secret_key.clone(),
address.clone(),
port.clone(),
),
2022-02-04 16:01:35 -08:00
None => return Ok(()),
}
};
// TODO: pass actual server host and port somewhere.
2022-02-05 10:02:13 -08:00
return Ok(Server::cancel(&address, &port, process_id, secret_key).await?);
2022-02-04 09:28:52 -08:00
}
2022-02-08 13:11:50 -08:00
// Active shard we're talking to.
// The lifetime of this depends on the pool mode:
2022-02-09 06:51:31 -08:00
// - if in session mode, this lives until the client disconnects,
2022-02-08 13:11:50 -08:00
// - if in transaction mode, this lives for the duration of one transaction.
let mut shard: Option<usize> = None;
2022-02-09 20:02:20 -08:00
let mut role: Option<Role> = None;
2022-02-03 13:54:07 -08:00
loop {
// Read a complete message from the client, which normally would be
// either a `Q` (query) or `P` (prepare, extended protocol).
// We can parse it here before grabbing a server from the pool,
// in case the client is sending some control messages, e.g.
2022-02-09 06:51:31 -08:00
// SET SHARDING KEY TO 'bigint';
let mut message = read_message(&mut self.read).await?;
2022-02-08 13:11:50 -08:00
// Parse for special select shard command.
// SET SHARDING KEY TO 'bigint';
2022-02-09 20:02:20 -08:00
match self.select_shard(message.clone(), pool.shards()) {
2022-02-08 13:11:50 -08:00
Some(s) => {
2022-02-09 20:02:20 -08:00
custom_protocol_response_ok(&mut self.write, "SET SHARDING KEY").await?;
2022-02-08 13:11:50 -08:00
shard = Some(s);
continue;
}
None => (),
};
2022-02-09 20:02:20 -08:00
// Parse for special server role selection command.
//
match self.select_role(message.clone()) {
Some(r) => {
custom_protocol_response_ok(&mut self.write, "SET SERVER ROLE").await?;
role = Some(r);
continue;
}
None => (),
};
// Grab a server from the pool.
// None = any shard
2022-02-09 20:02:20 -08:00
let connection = pool.get(shard, role).await.unwrap();
let mut proxy = connection.0;
let _address = connection.1;
2022-02-03 16:25:05 -08:00
let server = &mut *proxy;
2022-02-04 16:08:18 -08:00
// Claim this server as mine for query cancellation.
2022-02-04 16:01:35 -08:00
server.claim(self.process_id, self.secret_key);
2022-02-03 16:25:05 -08:00
loop {
// No messages in the buffer, read one.
let mut message = if message.len() == 0 {
match read_message(&mut self.read).await {
Ok(message) => message,
Err(err) => {
// Client disconnected without warning.
if server.in_transaction() {
// TODO: this is what PgBouncer does
// which leads to connection thrashing.
//
// I think we could issue a ROLLBACK here instead.
// server.mark_bad();
server.query("ROLLBACK; DISCARD ALL;").await?;
2022-02-05 18:20:53 -08:00
}
return Err(err);
}
}
} else {
let msg = message.clone();
message.clear();
msg
};
2022-02-03 16:25:05 -08:00
let original = message.clone(); // To be forwarded to the server
let code = message.get_u8() as char;
let _len = message.get_i32() as usize;
match code {
'Q' => {
server.send(original).await?;
2022-02-04 08:26:50 -08:00
loop {
let response = server.recv().await?;
match write_all_half(&mut self.write, response).await {
Ok(_) => (),
Err(err) => {
server.mark_bad();
return Err(err);
}
};
if !server.is_data_available() {
break;
}
2022-02-04 08:26:50 -08:00
}
2022-02-03 16:25:05 -08:00
// Release server
2022-02-05 15:23:21 -08:00
if !server.in_transaction() && self.transaction_mode {
2022-02-08 13:11:50 -08:00
shard = None;
2022-02-09 20:02:20 -08:00
role = None;
2022-02-03 16:25:05 -08:00
break;
}
}
'X' => {
// Client closing. Rollback and clean up
// connection before releasing into the pool.
// Pgbouncer closes the connection which leads to
// connection thrashing when clients misbehave.
// This pool will protect the database. :salute:
2022-02-03 18:02:50 -08:00
if server.in_transaction() {
server.query("ROLLBACK; DISCARD ALL;").await?;
2022-02-03 18:02:50 -08:00
}
2022-02-03 16:25:05 -08:00
return Ok(());
}
'P' => {
// Extended protocol, let's buffer most of it
self.buffer.put(&original[..]);
}
'B' => {
self.buffer.put(&original[..]);
}
// Describe
2022-02-03 16:25:05 -08:00
'D' => {
self.buffer.put(&original[..]);
}
'E' => {
self.buffer.put(&original[..]);
}
'S' => {
// Extended protocol, client requests sync
self.buffer.put(&original[..]);
server.send(self.buffer.clone()).await?;
self.buffer.clear();
2022-02-04 08:26:50 -08:00
loop {
let response = server.recv().await?;
match write_all_half(&mut self.write, response).await {
Ok(_) => (),
Err(err) => {
server.mark_bad();
return Err(err);
}
};
if !server.is_data_available() {
break;
}
2022-02-04 08:26:50 -08:00
}
2022-02-03 16:25:05 -08:00
// Release server
2022-02-05 15:23:21 -08:00
if !server.in_transaction() && self.transaction_mode {
2022-02-08 13:11:50 -08:00
shard = None;
2022-02-09 20:02:20 -08:00
role = None;
2022-02-03 16:25:05 -08:00
break;
}
}
2022-02-04 08:06:45 -08:00
// CopyData
'd' => {
// Forward the data to the server,
// don't buffer it since it can be rather large.
server.send(original).await?;
}
'c' | 'f' => {
// Copy is done.
server.send(original).await?;
let response = server.recv().await?;
match write_all_half(&mut self.write, response).await {
Ok(_) => (),
Err(err) => {
server.mark_bad();
return Err(err);
}
};
2022-02-05 15:23:21 -08:00
// Release the server
if !server.in_transaction() && self.transaction_mode {
println!("Releasing after copy done");
2022-02-08 13:11:50 -08:00
shard = None;
2022-02-09 20:02:20 -08:00
role = None;
2022-02-05 15:23:21 -08:00
break;
}
2022-02-04 08:06:45 -08:00
}
2022-02-03 16:25:05 -08:00
_ => {
println!(">>> Unexpected code: {}", code);
}
2022-02-03 15:17:04 -08:00
}
2022-02-03 13:54:07 -08:00
}
2022-02-04 16:01:35 -08:00
self.release();
2022-02-03 13:54:07 -08:00
}
}
2022-02-04 16:01:35 -08:00
2022-02-04 16:08:18 -08:00
/// Release the server from being mine. I can't cancel its queries anymore.
2022-02-04 16:01:35 -08:00
pub fn release(&mut self) {
let mut guard = self.client_server_map.lock().unwrap();
guard.remove(&(self.process_id, self.secret_key));
}
2022-02-08 13:11:50 -08:00
2022-02-09 06:51:31 -08:00
/// Determine if the query is part of our special syntax, extract
/// the shard key, and return the shard to query based on Postgres'
/// PARTITION BY HASH function.
2022-02-09 20:02:20 -08:00
fn select_shard(&mut self, mut buf: BytesMut, shards: usize) -> Option<usize> {
2022-02-08 13:11:50 -08:00
let code = buf.get_u8() as char;
2022-02-09 06:51:31 -08:00
// Only supporting simpe protocol here, so
// one would have to execute something like this:
// psql -c "SET SHARDING KEY TO '1234'"
// after sanitizing the value manually, which can be just done with an
// int parser, e.g. `let key = "1234".parse::<i64>().unwrap()`.
2022-02-08 13:11:50 -08:00
match code {
'Q' => (),
_ => return None,
};
let len = buf.get_i32();
let query = String::from_utf8_lossy(&buf[..len as usize - 4 - 1]).to_ascii_uppercase(); // Don't read the ternminating null
if self.sharding_regex.is_match(&query) {
let shard = query.split("'").collect::<Vec<&str>>()[1];
match shard.parse::<i64>() {
Ok(shard) => {
let sharder = Sharder::new(shards);
Some(sharder.pg_bigint_hash(shard))
}
Err(_) => None,
}
} else {
None
}
}
2022-02-09 20:02:20 -08:00
// Pick a primary or a replica from the pool.
fn select_role(&mut self, mut buf: BytesMut) -> Option<Role> {
let code = buf.get_u8() as char;
// Same story as select_shard() above.
match code {
'Q' => (),
_ => return None,
};
let len = buf.get_i32();
let query = String::from_utf8_lossy(&buf[..len as usize - 4 - 1]).to_ascii_uppercase();
// Copy / paste from above. If we get one more of these use cases,
// it'll be time to abstract :).
if self.role_regex.is_match(&query) {
let role = query.split("'").collect::<Vec<&str>>()[1];
match role {
"PRIMARY" => Some(Role::Primary),
"REPLICA" => Some(Role::Replica),
_ => return None,
}
} else {
None
}
}
2022-02-03 15:17:04 -08:00
}