mirror of
https://github.com/postgresml/pgcat.git
synced 2026-03-27 10:46:30 +00:00
Automatic sharding for SELECT v2 (#337)
* More comprehensive read sharding support * A few fixes * fq * comment * wildcard
This commit is contained in:
@@ -102,7 +102,7 @@ primary_reads_enabled = true
|
|||||||
sharding_function = "pg_bigint_hash"
|
sharding_function = "pg_bigint_hash"
|
||||||
|
|
||||||
# Automatically parse this from queries and route queries to the right shard!
|
# Automatically parse this from queries and route queries to the right shard!
|
||||||
automatic_sharding_key = "id"
|
automatic_sharding_key = "data.id"
|
||||||
|
|
||||||
# Idle timeout can be overwritten in the pool
|
# Idle timeout can be overwritten in the pool
|
||||||
idle_timeout = 40000
|
idle_timeout = 40000
|
||||||
|
|||||||
@@ -374,7 +374,7 @@ impl Pool {
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn validate(&self) -> Result<(), Error> {
|
pub fn validate(&mut self) -> Result<(), Error> {
|
||||||
match self.default_role.as_ref() {
|
match self.default_role.as_ref() {
|
||||||
"any" => (),
|
"any" => (),
|
||||||
"primary" => (),
|
"primary" => (),
|
||||||
@@ -414,6 +414,25 @@ impl Pool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.automatic_sharding_key = match &self.automatic_sharding_key {
|
||||||
|
Some(key) => {
|
||||||
|
// No quotes in the key so we don't have to compare quoted
|
||||||
|
// to unquoted idents.
|
||||||
|
let key = key.replace("\"", "");
|
||||||
|
|
||||||
|
if key.split(".").count() != 2 {
|
||||||
|
error!(
|
||||||
|
"automatic_sharding_key '{}' must be fully qualified, e.g. t.{}`",
|
||||||
|
key, key
|
||||||
|
);
|
||||||
|
return Err(Error::BadConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(key)
|
||||||
|
}
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,7 +5,9 @@ use log::{debug, error};
|
|||||||
use once_cell::sync::OnceCell;
|
use once_cell::sync::OnceCell;
|
||||||
use regex::{Regex, RegexSet};
|
use regex::{Regex, RegexSet};
|
||||||
use sqlparser::ast::Statement::{Query, StartTransaction};
|
use sqlparser::ast::Statement::{Query, StartTransaction};
|
||||||
use sqlparser::ast::{BinaryOperator, Expr, SetExpr, Value};
|
use sqlparser::ast::{
|
||||||
|
BinaryOperator, Expr, Ident, JoinConstraint, JoinOperator, SetExpr, TableFactor, Value,
|
||||||
|
};
|
||||||
use sqlparser::dialect::PostgreSqlDialect;
|
use sqlparser::dialect::PostgreSqlDialect;
|
||||||
use sqlparser::parser::Parser;
|
use sqlparser::parser::Parser;
|
||||||
|
|
||||||
@@ -403,20 +405,67 @@ impl QueryRouter {
|
|||||||
|
|
||||||
/// A `selection` is the `WHERE` clause. This parses
|
/// A `selection` is the `WHERE` clause. This parses
|
||||||
/// the clause and extracts the sharding key, if present.
|
/// the clause and extracts the sharding key, if present.
|
||||||
fn selection_parser(&self, expr: &Expr) -> Vec<i64> {
|
fn selection_parser(&self, expr: &Expr, table_names: &Vec<Vec<Ident>>) -> Vec<i64> {
|
||||||
let mut result = Vec::new();
|
let mut result = Vec::new();
|
||||||
let mut found = false;
|
let mut found = false;
|
||||||
|
|
||||||
|
let sharding_key = self
|
||||||
|
.pool_settings
|
||||||
|
.automatic_sharding_key
|
||||||
|
.as_ref()
|
||||||
|
.unwrap()
|
||||||
|
.split(".")
|
||||||
|
.map(|ident| Ident::new(ident))
|
||||||
|
.collect::<Vec<Ident>>();
|
||||||
|
|
||||||
|
// Sharding key must be always fully qualified
|
||||||
|
assert_eq!(sharding_key.len(), 2);
|
||||||
|
|
||||||
// This parses `sharding_key = 5`. But it's technically
|
// This parses `sharding_key = 5`. But it's technically
|
||||||
// legal to write `5 = sharding_key`. I don't judge the people
|
// legal to write `5 = sharding_key`. I don't judge the people
|
||||||
// who do that, but I think ORMs will still use the first variant,
|
// who do that, but I think ORMs will still use the first variant,
|
||||||
// so we can leave the second as a TODO.
|
// so we can leave the second as a TODO.
|
||||||
if let Expr::BinaryOp { left, op, right } = expr {
|
if let Expr::BinaryOp { left, op, right } = expr {
|
||||||
match &**left {
|
match &**left {
|
||||||
Expr::BinaryOp { .. } => result.extend(self.selection_parser(left)),
|
Expr::BinaryOp { .. } => result.extend(self.selection_parser(left, table_names)),
|
||||||
Expr::Identifier(ident) => {
|
Expr::Identifier(ident) => {
|
||||||
found =
|
// Only if we're dealing with only one table
|
||||||
ident.value == *self.pool_settings.automatic_sharding_key.as_ref().unwrap();
|
// and there is no ambiguity
|
||||||
|
if &ident.value == &sharding_key[1].value {
|
||||||
|
// Sharding key is unique enough, don't worry about
|
||||||
|
// table names.
|
||||||
|
if &sharding_key[0].value == "*" {
|
||||||
|
found = true;
|
||||||
|
} else if table_names.len() == 1 {
|
||||||
|
let table = &table_names[0];
|
||||||
|
|
||||||
|
if table.len() == 1 {
|
||||||
|
// Table is not fully qualified, e.g.
|
||||||
|
// SELECT * FROM t WHERE sharding_key = 5
|
||||||
|
// Make sure the table name from the sharding key matches
|
||||||
|
// the table name from the query.
|
||||||
|
found = &sharding_key[0].value == &table[0].value;
|
||||||
|
} else if table.len() == 2 {
|
||||||
|
// Table name is fully qualified with the schema: e.g.
|
||||||
|
// SELECT * FROM public.t WHERE sharding_key = 5
|
||||||
|
// Ignore the schema (TODO: at some point, we want schema support)
|
||||||
|
// and use the table name only.
|
||||||
|
found = &sharding_key[0].value == &table[1].value;
|
||||||
|
} else {
|
||||||
|
debug!("Got table name with more than two idents, which is not possible");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Expr::CompoundIdentifier(idents) => {
|
||||||
|
// The key is fully qualified in the query,
|
||||||
|
// it will exist or Postgres will throw an error.
|
||||||
|
if idents.len() == 2 {
|
||||||
|
found = &sharding_key[0].value == &idents[0].value
|
||||||
|
&& &sharding_key[1].value == &idents[1].value;
|
||||||
|
}
|
||||||
|
// TODO: key can have schema as well, e.g. public.data.id (len == 3)
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
};
|
};
|
||||||
@@ -433,7 +482,7 @@ impl QueryRouter {
|
|||||||
};
|
};
|
||||||
|
|
||||||
match &**right {
|
match &**right {
|
||||||
Expr::BinaryOp { .. } => result.extend(self.selection_parser(right)),
|
Expr::BinaryOp { .. } => result.extend(self.selection_parser(right, table_names)),
|
||||||
Expr::Value(Value::Number(value, ..)) => {
|
Expr::Value(Value::Number(value, ..)) => {
|
||||||
if found {
|
if found {
|
||||||
match value.parse::<i64>() {
|
match value.parse::<i64>() {
|
||||||
@@ -456,6 +505,7 @@ impl QueryRouter {
|
|||||||
/// Try to figure out which shard the query should go to.
|
/// Try to figure out which shard the query should go to.
|
||||||
fn infer_shard(&self, query: &sqlparser::ast::Query) -> Option<usize> {
|
fn infer_shard(&self, query: &sqlparser::ast::Query) -> Option<usize> {
|
||||||
let mut shards = BTreeSet::new();
|
let mut shards = BTreeSet::new();
|
||||||
|
let mut exprs = Vec::new();
|
||||||
|
|
||||||
match &*query.body {
|
match &*query.body {
|
||||||
SetExpr::Query(query) => {
|
SetExpr::Query(query) => {
|
||||||
@@ -467,27 +517,75 @@ impl QueryRouter {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SELECT * FROM ...
|
||||||
|
// We understand that pretty well.
|
||||||
SetExpr::Select(select) => {
|
SetExpr::Select(select) => {
|
||||||
|
// Collect all table names from the query.
|
||||||
|
let mut table_names = Vec::new();
|
||||||
|
|
||||||
|
for table in select.from.iter() {
|
||||||
|
match &table.relation {
|
||||||
|
TableFactor::Table { name, .. } => {
|
||||||
|
table_names.push(name.0.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => (),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Get table names from all the joins.
|
||||||
|
for join in table.joins.iter() {
|
||||||
|
match &join.relation {
|
||||||
|
TableFactor::Table { name, .. } => {
|
||||||
|
table_names.push(name.0.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => (),
|
||||||
|
};
|
||||||
|
|
||||||
|
// We can filter results based on join conditions, e.g.
|
||||||
|
// SELECT * FROM t INNER JOIN B ON B.sharding_key = 5;
|
||||||
|
match &join.join_operator {
|
||||||
|
JoinOperator::Inner(inner_join) => match &inner_join {
|
||||||
|
JoinConstraint::On(expr) => {
|
||||||
|
// Parse the selection criteria later.
|
||||||
|
exprs.push(expr.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => (),
|
||||||
|
},
|
||||||
|
|
||||||
|
_ => (),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the actual "FROM ..."
|
||||||
match &select.selection {
|
match &select.selection {
|
||||||
Some(selection) => {
|
Some(selection) => {
|
||||||
let sharding_keys = self.selection_parser(selection);
|
exprs.push(selection.clone());
|
||||||
|
|
||||||
// TODO: Add support for prepared statements here.
|
|
||||||
// This should just give us the position of the value in the `B` message.
|
|
||||||
|
|
||||||
let sharder = Sharder::new(
|
|
||||||
self.pool_settings.shards,
|
|
||||||
self.pool_settings.sharding_function,
|
|
||||||
);
|
|
||||||
|
|
||||||
for value in sharding_keys {
|
|
||||||
let shard = sharder.shard(value);
|
|
||||||
shards.insert(shard);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
None => (),
|
None => (),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Look for sharding keys in either the join condition
|
||||||
|
// or the selection.
|
||||||
|
for expr in exprs.iter() {
|
||||||
|
let sharding_keys = self.selection_parser(expr, &table_names);
|
||||||
|
|
||||||
|
// TODO: Add support for prepared statements here.
|
||||||
|
// This should just give us the position of the value in the `B` message.
|
||||||
|
|
||||||
|
let sharder = Sharder::new(
|
||||||
|
self.pool_settings.shards,
|
||||||
|
self.pool_settings.sharding_function,
|
||||||
|
);
|
||||||
|
|
||||||
|
for value in sharding_keys {
|
||||||
|
let shard = sharder.shard(value);
|
||||||
|
shards.insert(shard);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
};
|
};
|
||||||
@@ -825,7 +923,7 @@ mod test {
|
|||||||
query_parser_enabled: true,
|
query_parser_enabled: true,
|
||||||
primary_reads_enabled: false,
|
primary_reads_enabled: false,
|
||||||
sharding_function: ShardingFunction::PgBigintHash,
|
sharding_function: ShardingFunction::PgBigintHash,
|
||||||
automatic_sharding_key: Some(String::from("id")),
|
automatic_sharding_key: Some(String::from("test.id")),
|
||||||
healthcheck_delay: PoolSettings::default().healthcheck_delay,
|
healthcheck_delay: PoolSettings::default().healthcheck_delay,
|
||||||
healthcheck_timeout: PoolSettings::default().healthcheck_timeout,
|
healthcheck_timeout: PoolSettings::default().healthcheck_timeout,
|
||||||
ban_time: PoolSettings::default().ban_time,
|
ban_time: PoolSettings::default().ban_time,
|
||||||
@@ -854,11 +952,6 @@ mod test {
|
|||||||
let q2 = simple_query("SET SERVER ROLE TO 'default'");
|
let q2 = simple_query("SET SERVER ROLE TO 'default'");
|
||||||
assert!(qr.try_execute_command(&q2) != None);
|
assert!(qr.try_execute_command(&q2) != None);
|
||||||
assert_eq!(qr.active_role.unwrap(), pool_settings.default_role);
|
assert_eq!(qr.active_role.unwrap(), pool_settings.default_role);
|
||||||
|
|
||||||
// Here we go :)
|
|
||||||
let q3 = simple_query("SELECT * FROM test WHERE id = 5 AND values IN (1, 2, 3)");
|
|
||||||
assert!(qr.infer(&q3));
|
|
||||||
assert_eq!(qr.shard(), 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -891,7 +984,7 @@ mod test {
|
|||||||
query_parser_enabled: true,
|
query_parser_enabled: true,
|
||||||
primary_reads_enabled: false,
|
primary_reads_enabled: false,
|
||||||
sharding_function: ShardingFunction::PgBigintHash,
|
sharding_function: ShardingFunction::PgBigintHash,
|
||||||
automatic_sharding_key: Some(String::from("id")),
|
automatic_sharding_key: None,
|
||||||
healthcheck_delay: PoolSettings::default().healthcheck_delay,
|
healthcheck_delay: PoolSettings::default().healthcheck_delay,
|
||||||
healthcheck_timeout: PoolSettings::default().healthcheck_timeout,
|
healthcheck_timeout: PoolSettings::default().healthcheck_timeout,
|
||||||
ban_time: PoolSettings::default().ban_time,
|
ban_time: PoolSettings::default().ban_time,
|
||||||
@@ -920,4 +1013,56 @@ mod test {
|
|||||||
assert!(qr.try_execute_command(&q2) == None);
|
assert!(qr.try_execute_command(&q2) == None);
|
||||||
assert_eq!(qr.active_shard, Some(2));
|
assert_eq!(qr.active_shard, Some(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_automatic_sharding_key() {
|
||||||
|
QueryRouter::setup();
|
||||||
|
|
||||||
|
let mut qr = QueryRouter::new();
|
||||||
|
qr.pool_settings.automatic_sharding_key = Some("data.id".to_string());
|
||||||
|
qr.pool_settings.shards = 3;
|
||||||
|
|
||||||
|
assert!(qr.infer(&simple_query("SELECT * FROM data WHERE id = 5")));
|
||||||
|
assert_eq!(qr.shard(), 2);
|
||||||
|
|
||||||
|
assert!(qr.infer(&simple_query(
|
||||||
|
"SELECT one, two, three FROM public.data WHERE id = 6"
|
||||||
|
)));
|
||||||
|
assert_eq!(qr.shard(), 0);
|
||||||
|
|
||||||
|
assert!(qr.infer(&simple_query(
|
||||||
|
"SELECT * FROM data
|
||||||
|
INNER JOIN t2 ON data.id = 5
|
||||||
|
AND t2.data_id = data.id
|
||||||
|
WHERE data.id = 5"
|
||||||
|
)));
|
||||||
|
assert_eq!(qr.shard(), 2);
|
||||||
|
|
||||||
|
// Shard did not move because we couldn't determine the sharding key since it could be ambiguous
|
||||||
|
// in the query.
|
||||||
|
assert!(qr.infer(&simple_query(
|
||||||
|
"SELECT * FROM t2 INNER JOIN data ON id = 6 AND data.id = t2.data_id"
|
||||||
|
)));
|
||||||
|
assert_eq!(qr.shard(), 2);
|
||||||
|
|
||||||
|
assert!(qr.infer(&simple_query(
|
||||||
|
r#"SELECT * FROM "public"."data" WHERE "id" = 6"#
|
||||||
|
)));
|
||||||
|
assert_eq!(qr.shard(), 0);
|
||||||
|
|
||||||
|
assert!(qr.infer(&simple_query(
|
||||||
|
r#"SELECT * FROM "public"."data" WHERE "data"."id" = 5"#
|
||||||
|
)));
|
||||||
|
assert_eq!(qr.shard(), 2);
|
||||||
|
|
||||||
|
// Super unique sharding key
|
||||||
|
qr.pool_settings.automatic_sharding_key = Some("*.unique_enough_column_name".to_string());
|
||||||
|
assert!(qr.infer(&simple_query(
|
||||||
|
"SELECT * FROM table_x WHERE unique_enough_column_name = 6"
|
||||||
|
)));
|
||||||
|
assert_eq!(qr.shard(), 0);
|
||||||
|
|
||||||
|
assert!(qr.infer(&simple_query("SELECT * FROM table_y WHERE another_key = 5")));
|
||||||
|
assert_eq!(qr.shard(), 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user