mirror of
https://github.com/postgresml/pgcat.git
synced 2026-03-23 01:16:30 +00:00
Allow shard setting with comments (#293)
What Allows shard selection by the client to come in via comments like /* shard_id: 1 */ select * from foo; Why We're using a setup in Ruby that makes it tough or impossible to inject commands on the connection to set the shard before it gets to the "real" SQL being run. Instead we have an updated PG adapter that allows injection of comments before each executed SQL statement. We need this support in pgcat in order to keep some complex shard picking logic in Ruby code while using pgcat for connection management. Local Testing Run postgres and pgcat with the default options. Run psql < tests/sharding/query_routing_setup.sql to setup the database for the tests and run ./tests/pgbench/external_shard_test.sh as often as needed to exercise the shard setting comment test.
This commit is contained in:
@@ -85,6 +85,12 @@ query_parser_enabled = true
|
||||
# queries. The primary can always be explicitly selected with our custom protocol.
|
||||
primary_reads_enabled = true
|
||||
|
||||
# Allow sharding commands to be passed as statement comments instead of
|
||||
# separate commands. If these are unset this functionality is disabled.
|
||||
# sharding_key_regex = '/\* sharding_key: (\d+) \*/'
|
||||
# shard_id_regex = '/\* shard_id: (\d+) \*/'
|
||||
# regex_search_limit = 1000 # only look at the first 1000 characters of SQL statements
|
||||
|
||||
# So what if you wanted to implement a different hashing function,
|
||||
# or you've already built one and you want this pooler to use it?
|
||||
#
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
use arc_swap::ArcSwap;
|
||||
use log::{error, info};
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use serde_derive::{Deserialize, Serialize};
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::hash::Hash;
|
||||
@@ -342,8 +343,15 @@ pub struct Pool {
|
||||
#[serde(default = "Pool::default_automatic_sharding_key")]
|
||||
pub automatic_sharding_key: Option<String>,
|
||||
|
||||
pub sharding_key_regex: Option<String>,
|
||||
pub shard_id_regex: Option<String>,
|
||||
pub regex_search_limit: Option<usize>,
|
||||
|
||||
pub shards: BTreeMap<String, Shard>,
|
||||
pub users: BTreeMap<String, User>,
|
||||
// Note, don't put simple fields below these configs. There's a compatability issue with TOML that makes it
|
||||
// incompatible to have simple fields in TOML after complex objects. See
|
||||
// https://users.rust-lang.org/t/why-toml-to-string-get-error-valueaftertable/85903
|
||||
}
|
||||
|
||||
impl Pool {
|
||||
@@ -387,6 +395,18 @@ impl Pool {
|
||||
shard.validate()?;
|
||||
}
|
||||
|
||||
for (option, name) in [
|
||||
(&self.shard_id_regex, "shard_id_regex"),
|
||||
(&self.sharding_key_regex, "sharding_key_regex"),
|
||||
] {
|
||||
if let Some(regex) = option {
|
||||
if let Err(parse_err) = Regex::new(regex.as_str()) {
|
||||
error!("{} is not a valid Regex: {}", name, parse_err);
|
||||
return Err(Error::BadConfig);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -405,6 +425,9 @@ impl Default for Pool {
|
||||
automatic_sharding_key: None,
|
||||
connect_timeout: None,
|
||||
idle_timeout: None,
|
||||
sharding_key_regex: None,
|
||||
shard_id_regex: None,
|
||||
regex_search_limit: Some(1000),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
22
src/pool.rs
22
src/pool.rs
@@ -8,6 +8,7 @@ use once_cell::sync::Lazy;
|
||||
use parking_lot::{Mutex, RwLock};
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::thread_rng;
|
||||
use regex::Regex;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::{
|
||||
atomic::{AtomicBool, Ordering},
|
||||
@@ -104,6 +105,15 @@ pub struct PoolSettings {
|
||||
|
||||
// Ban time
|
||||
pub ban_time: i64,
|
||||
|
||||
// Regex for searching for the sharding key in SQL statements
|
||||
pub sharding_key_regex: Option<Regex>,
|
||||
|
||||
// Regex for searching for the shard id in SQL statements
|
||||
pub shard_id_regex: Option<Regex>,
|
||||
|
||||
// Limit how much of each query is searched for a potential shard regex match
|
||||
pub regex_search_limit: usize,
|
||||
}
|
||||
|
||||
impl Default for PoolSettings {
|
||||
@@ -121,6 +131,9 @@ impl Default for PoolSettings {
|
||||
healthcheck_delay: General::default_healthcheck_delay(),
|
||||
healthcheck_timeout: General::default_healthcheck_timeout(),
|
||||
ban_time: General::default_ban_time(),
|
||||
sharding_key_regex: None,
|
||||
shard_id_regex: None,
|
||||
regex_search_limit: 1000,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -300,6 +313,15 @@ impl ConnectionPool {
|
||||
healthcheck_delay: config.general.healthcheck_delay,
|
||||
healthcheck_timeout: config.general.healthcheck_timeout,
|
||||
ban_time: config.general.ban_time,
|
||||
sharding_key_regex: pool_config
|
||||
.sharding_key_regex
|
||||
.clone()
|
||||
.map(|regex| Regex::new(regex.as_str()).unwrap()),
|
||||
shard_id_regex: pool_config
|
||||
.shard_id_regex
|
||||
.clone()
|
||||
.map(|regex| Regex::new(regex.as_str()).unwrap()),
|
||||
regex_search_limit: pool_config.regex_search_limit.unwrap_or(1000),
|
||||
},
|
||||
validated: Arc::new(AtomicBool::new(false)),
|
||||
paused: Arc::new(AtomicBool::new(false)),
|
||||
|
||||
@@ -14,6 +14,7 @@ use crate::messages::BytesMutReader;
|
||||
use crate::pool::PoolSettings;
|
||||
use crate::sharding::Sharder;
|
||||
|
||||
use std::cmp;
|
||||
use std::collections::BTreeSet;
|
||||
use std::io::Cursor;
|
||||
|
||||
@@ -114,7 +115,52 @@ impl QueryRouter {
|
||||
|
||||
let code = message_cursor.get_u8() as char;
|
||||
|
||||
// Only simple protocol supported for commands.
|
||||
// Check for any sharding regex matches in any queries
|
||||
match code as char {
|
||||
// For Parse and Query messages peek to see if they specify a shard_id as a comment early in the statement
|
||||
'P' | 'Q' => {
|
||||
if self.pool_settings.shard_id_regex.is_some()
|
||||
|| self.pool_settings.sharding_key_regex.is_some()
|
||||
{
|
||||
// Check only the first block of bytes configured by the pool settings
|
||||
let len = message_cursor.get_i32() as usize;
|
||||
let seg = cmp::min(len - 5, self.pool_settings.regex_search_limit);
|
||||
let initial_segment = String::from_utf8_lossy(&message_buffer[0..seg]);
|
||||
|
||||
// Check for a shard_id included in the query
|
||||
if let Some(shard_id_regex) = &self.pool_settings.shard_id_regex {
|
||||
let shard_id = shard_id_regex.captures(&initial_segment).and_then(|cap| {
|
||||
cap.get(1).and_then(|id| id.as_str().parse::<usize>().ok())
|
||||
});
|
||||
if let Some(shard_id) = shard_id {
|
||||
debug!("Setting shard to {:?}", shard_id);
|
||||
self.set_shard(shard_id);
|
||||
// Skip other command processing since a sharding command was found
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
// Check for a sharding_key included in the query
|
||||
if let Some(sharding_key_regex) = &self.pool_settings.sharding_key_regex {
|
||||
let sharding_key =
|
||||
sharding_key_regex
|
||||
.captures(&initial_segment)
|
||||
.and_then(|cap| {
|
||||
cap.get(1).and_then(|id| id.as_str().parse::<i64>().ok())
|
||||
});
|
||||
if let Some(sharding_key) = sharding_key {
|
||||
debug!("Setting sharding_key to {:?}", sharding_key);
|
||||
self.set_sharding_key(sharding_key);
|
||||
// Skip other command processing since a sharding command was found
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Only simple protocol supported for commands processed below
|
||||
if code != 'Q' {
|
||||
return None;
|
||||
}
|
||||
@@ -192,13 +238,11 @@ impl QueryRouter {
|
||||
|
||||
match command {
|
||||
Command::SetShardingKey => {
|
||||
let sharder = Sharder::new(
|
||||
self.pool_settings.shards,
|
||||
self.pool_settings.sharding_function,
|
||||
);
|
||||
let shard = sharder.shard(value.parse::<i64>().unwrap());
|
||||
self.active_shard = Some(shard);
|
||||
value = shard.to_string();
|
||||
// TODO: some error handling here
|
||||
value = self
|
||||
.set_sharding_key(value.parse::<i64>().unwrap())
|
||||
.unwrap()
|
||||
.to_string();
|
||||
}
|
||||
|
||||
Command::SetShard => {
|
||||
@@ -465,6 +509,16 @@ impl QueryRouter {
|
||||
}
|
||||
}
|
||||
|
||||
fn set_sharding_key(&mut self, sharding_key: i64) -> Option<usize> {
|
||||
let sharder = Sharder::new(
|
||||
self.pool_settings.shards,
|
||||
self.pool_settings.sharding_function,
|
||||
);
|
||||
let shard = sharder.shard(sharding_key);
|
||||
self.set_shard(shard);
|
||||
self.active_shard
|
||||
}
|
||||
|
||||
/// Get the current desired server role we should be talking to.
|
||||
pub fn role(&self) -> Option<Role> {
|
||||
self.active_role
|
||||
@@ -775,6 +829,9 @@ mod test {
|
||||
healthcheck_delay: PoolSettings::default().healthcheck_delay,
|
||||
healthcheck_timeout: PoolSettings::default().healthcheck_timeout,
|
||||
ban_time: PoolSettings::default().ban_time,
|
||||
sharding_key_regex: None,
|
||||
shard_id_regex: None,
|
||||
regex_search_limit: 1000,
|
||||
};
|
||||
let mut qr = QueryRouter::new();
|
||||
assert_eq!(qr.active_role, None);
|
||||
@@ -820,4 +877,47 @@ mod test {
|
||||
)));
|
||||
assert_eq!(qr.role(), Role::Primary);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_regex_shard_parsing() {
|
||||
QueryRouter::setup();
|
||||
|
||||
let pool_settings = PoolSettings {
|
||||
pool_mode: PoolMode::Transaction,
|
||||
load_balancing_mode: crate::config::LoadBalancingMode::Random,
|
||||
shards: 5,
|
||||
user: crate::config::User::default(),
|
||||
default_role: Some(Role::Replica),
|
||||
query_parser_enabled: true,
|
||||
primary_reads_enabled: false,
|
||||
sharding_function: ShardingFunction::PgBigintHash,
|
||||
automatic_sharding_key: Some(String::from("id")),
|
||||
healthcheck_delay: PoolSettings::default().healthcheck_delay,
|
||||
healthcheck_timeout: PoolSettings::default().healthcheck_timeout,
|
||||
ban_time: PoolSettings::default().ban_time,
|
||||
sharding_key_regex: Some(Regex::new(r"/\* sharding_key: (\d+) \*/").unwrap()),
|
||||
shard_id_regex: Some(Regex::new(r"/\* shard_id: (\d+) \*/").unwrap()),
|
||||
regex_search_limit: 1000,
|
||||
};
|
||||
let mut qr = QueryRouter::new();
|
||||
qr.update_pool_settings(pool_settings.clone());
|
||||
|
||||
// Shard should start out unset
|
||||
assert_eq!(qr.active_shard, None);
|
||||
|
||||
// Make sure setting it works
|
||||
let q1 = simple_query("/* shard_id: 1 */ select 1 from foo;");
|
||||
assert!(qr.try_execute_command(&q1) == None);
|
||||
assert_eq!(qr.active_shard, Some(1));
|
||||
|
||||
// And make sure changing it works
|
||||
let q2 = simple_query("/* shard_id: 0 */ select 1 from foo;");
|
||||
assert!(qr.try_execute_command(&q2) == None);
|
||||
assert_eq!(qr.active_shard, Some(0));
|
||||
|
||||
// Validate setting by shard with expected shard copied from sharding.rs tests
|
||||
let q2 = simple_query("/* sharding_key: 6 */ select 1 from foo;");
|
||||
assert!(qr.try_execute_command(&q2) == None);
|
||||
assert_eq!(qr.active_shard, Some(2));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user