mirror of
https://github.com/postgresml/pgcat.git
synced 2026-03-27 10:46:30 +00:00
Another example of a sharding function (#41)
* Another example of a sharding function * tests
This commit is contained in:
10
pgcat.toml
10
pgcat.toml
@@ -96,3 +96,13 @@ query_parser_enabled = false
|
|||||||
# load balancing of read queries. Otherwise, the primary will only be used for write
|
# load balancing of read queries. Otherwise, the primary will only be used for write
|
||||||
# queries. The primary can always be explicitely selected with our custom protocol.
|
# queries. The primary can always be explicitely selected with our custom protocol.
|
||||||
primary_reads_enabled = true
|
primary_reads_enabled = true
|
||||||
|
|
||||||
|
# So what if you wanted to implement a different hashing function,
|
||||||
|
# or you've already built one and you want this pooler to use it?
|
||||||
|
#
|
||||||
|
# Current options:
|
||||||
|
#
|
||||||
|
# pg_bigint_hash: PARTITION BY HASH (Postgres hashing function)
|
||||||
|
# sha1: A hashing function based on SHA1
|
||||||
|
#
|
||||||
|
sharding_function = "pg_bigint_hash"
|
||||||
|
|||||||
@@ -118,6 +118,7 @@ pub struct QueryRouter {
|
|||||||
pub default_role: String,
|
pub default_role: String,
|
||||||
pub query_parser_enabled: bool,
|
pub query_parser_enabled: bool,
|
||||||
pub primary_reads_enabled: bool,
|
pub primary_reads_enabled: bool,
|
||||||
|
pub sharding_function: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for QueryRouter {
|
impl Default for QueryRouter {
|
||||||
@@ -126,6 +127,7 @@ impl Default for QueryRouter {
|
|||||||
default_role: String::from("any"),
|
default_role: String::from("any"),
|
||||||
query_parser_enabled: false,
|
query_parser_enabled: false,
|
||||||
primary_reads_enabled: true,
|
primary_reads_enabled: true,
|
||||||
|
sharding_function: "pg_bigint_hash".to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -159,6 +161,8 @@ impl Config {
|
|||||||
self.general.healthcheck_timeout
|
self.general.healthcheck_timeout
|
||||||
);
|
);
|
||||||
info!("Connection timeout: {}ms", self.general.connect_timeout);
|
info!("Connection timeout: {}ms", self.general.connect_timeout);
|
||||||
|
info!("Sharding function: {}", self.query_router.sharding_function);
|
||||||
|
info!("Number of shards: {}", self.shards.len());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -193,6 +197,18 @@ pub async fn parse(path: &str) -> Result<(), Error> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
match config.query_router.sharding_function.as_ref() {
|
||||||
|
"pg_bigint_hash" => (),
|
||||||
|
"sha1" => (),
|
||||||
|
_ => {
|
||||||
|
error!(
|
||||||
|
"Supported sharding functions are: 'pg_bigint_hash', 'sha1', got: '{}'",
|
||||||
|
config.query_router.sharding_function
|
||||||
|
);
|
||||||
|
return Err(Error::BadConfig);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Quick config sanity check.
|
// Quick config sanity check.
|
||||||
for shard in &config.shards {
|
for shard in &config.shards {
|
||||||
// We use addresses as unique identifiers,
|
// We use addresses as unique identifiers,
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
use crate::config::{get_config, Role};
|
use crate::config::{get_config, Role};
|
||||||
use crate::sharding::Sharder;
|
use crate::sharding::{Sharder, ShardingFunction};
|
||||||
/// Route queries automatically based on explicitely requested
|
/// Route queries automatically based on explicitely requested
|
||||||
/// or implied query characteristics.
|
/// or implied query characteristics.
|
||||||
use bytes::{Buf, BytesMut};
|
use bytes::{Buf, BytesMut};
|
||||||
@@ -48,6 +48,9 @@ pub struct QueryRouter {
|
|||||||
|
|
||||||
// Should we try to parse queries?
|
// Should we try to parse queries?
|
||||||
query_parser_enabled: bool,
|
query_parser_enabled: bool,
|
||||||
|
|
||||||
|
// Which sharding function are we using?
|
||||||
|
sharding_function: ShardingFunction,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl QueryRouter {
|
impl QueryRouter {
|
||||||
@@ -76,6 +79,12 @@ impl QueryRouter {
|
|||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let sharding_function = match config.query_router.sharding_function.as_ref() {
|
||||||
|
"pg_bigint_hash" => ShardingFunction::PgBigintHash,
|
||||||
|
"sha1" => ShardingFunction::Sha1,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
QueryRouter {
|
QueryRouter {
|
||||||
default_server_role: default_server_role,
|
default_server_role: default_server_role,
|
||||||
shards: config.shards.len(),
|
shards: config.shards.len(),
|
||||||
@@ -84,6 +93,7 @@ impl QueryRouter {
|
|||||||
active_shard: None,
|
active_shard: None,
|
||||||
primary_reads_enabled: config.query_router.primary_reads_enabled,
|
primary_reads_enabled: config.query_router.primary_reads_enabled,
|
||||||
query_parser_enabled: config.query_router.query_parser_enabled,
|
query_parser_enabled: config.query_router.query_parser_enabled,
|
||||||
|
sharding_function,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -139,8 +149,8 @@ impl QueryRouter {
|
|||||||
|
|
||||||
match command {
|
match command {
|
||||||
Command::SetShardingKey => {
|
Command::SetShardingKey => {
|
||||||
let sharder = Sharder::new(self.shards);
|
let sharder = Sharder::new(self.shards, self.sharding_function);
|
||||||
let shard = sharder.pg_bigint_hash(value.parse::<i64>().unwrap());
|
let shard = sharder.shard(value.parse::<i64>().unwrap());
|
||||||
self.active_shard = Some(shard);
|
self.active_shard = Some(shard);
|
||||||
value = shard.to_string();
|
value = shard.to_string();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,26 +1,62 @@
|
|||||||
|
use sha1::{Digest, Sha1};
|
||||||
|
|
||||||
// https://github.com/postgres/postgres/blob/27b77ecf9f4d5be211900eda54d8155ada50d696/src/include/catalog/partition.h#L20
|
// https://github.com/postgres/postgres/blob/27b77ecf9f4d5be211900eda54d8155ada50d696/src/include/catalog/partition.h#L20
|
||||||
const PARTITION_HASH_SEED: u64 = 0x7A5B22367996DCFD;
|
const PARTITION_HASH_SEED: u64 = 0x7A5B22367996DCFD;
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Copy, Clone)]
|
||||||
|
pub enum ShardingFunction {
|
||||||
|
PgBigintHash,
|
||||||
|
Sha1,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct Sharder {
|
pub struct Sharder {
|
||||||
shards: usize,
|
shards: usize,
|
||||||
|
sharding_function: ShardingFunction,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Sharder {
|
impl Sharder {
|
||||||
pub fn new(shards: usize) -> Sharder {
|
pub fn new(shards: usize, sharding_function: ShardingFunction) -> Sharder {
|
||||||
Sharder { shards: shards }
|
Sharder {
|
||||||
|
shards,
|
||||||
|
sharding_function,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn shard(&self, key: i64) -> usize {
|
||||||
|
match self.sharding_function {
|
||||||
|
ShardingFunction::PgBigintHash => self.pg_bigint_hash(key),
|
||||||
|
ShardingFunction::Sha1 => self.sha1(key),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Hash function used by Postgres to determine which partition
|
/// Hash function used by Postgres to determine which partition
|
||||||
/// to put the row in when using HASH(column) partitioning.
|
/// to put the row in when using HASH(column) partitioning.
|
||||||
/// Source: https://github.com/postgres/postgres/blob/27b77ecf9f4d5be211900eda54d8155ada50d696/src/common/hashfn.c#L631
|
/// Source: https://github.com/postgres/postgres/blob/27b77ecf9f4d5be211900eda54d8155ada50d696/src/common/hashfn.c#L631
|
||||||
/// Supports only 1 bigint at the moment, but we can add more later.
|
/// Supports only 1 bigint at the moment, but we can add more later.
|
||||||
pub fn pg_bigint_hash(&self, key: i64) -> usize {
|
fn pg_bigint_hash(&self, key: i64) -> usize {
|
||||||
let mut lohalf = key as u32;
|
let mut lohalf = key as u32;
|
||||||
let hihalf = (key >> 32) as u32;
|
let hihalf = (key >> 32) as u32;
|
||||||
lohalf ^= if key >= 0 { hihalf } else { !hihalf };
|
lohalf ^= if key >= 0 { hihalf } else { !hihalf };
|
||||||
Self::combine(0, Self::pg_u32_hash(lohalf)) as usize % self.shards
|
Self::combine(0, Self::pg_u32_hash(lohalf)) as usize % self.shards
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Example of a hashing function based on SHA1.
|
||||||
|
fn sha1(&self, key: i64) -> usize {
|
||||||
|
let mut hasher = Sha1::new();
|
||||||
|
|
||||||
|
hasher.update(&key.to_string().as_bytes());
|
||||||
|
|
||||||
|
let result = hasher.finalize();
|
||||||
|
|
||||||
|
// Convert the SHA1 hash into hex so we can parse it as a large integer.
|
||||||
|
let hex = format!("{:x}", result);
|
||||||
|
|
||||||
|
// Parse the last 8 bytes as an integer (8 bytes = bigint).
|
||||||
|
let key = i64::from_str_radix(&hex[hex.len() - 8..], 16).unwrap() as usize;
|
||||||
|
|
||||||
|
key % self.shards
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn rot(x: u32, k: u32) -> u32 {
|
fn rot(x: u32, k: u32) -> u32 {
|
||||||
(x << k) | (x >> (32 - k))
|
(x << k) | (x >> (32 - k))
|
||||||
@@ -109,36 +145,51 @@ mod test {
|
|||||||
// confirming that we implemented Postgres BIGINT hashing correctly.
|
// confirming that we implemented Postgres BIGINT hashing correctly.
|
||||||
#[test]
|
#[test]
|
||||||
fn test_pg_bigint_hash() {
|
fn test_pg_bigint_hash() {
|
||||||
let sharder = Sharder::new(5);
|
let sharder = Sharder::new(5, ShardingFunction::PgBigintHash);
|
||||||
|
|
||||||
let shard_0 = vec![1, 4, 5, 14, 19, 39, 40, 46, 47, 53];
|
let shard_0 = vec![1, 4, 5, 14, 19, 39, 40, 46, 47, 53];
|
||||||
|
|
||||||
for v in shard_0 {
|
for v in shard_0 {
|
||||||
assert_eq!(sharder.pg_bigint_hash(v), 0);
|
assert_eq!(sharder.shard(v), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
let shard_1 = vec![2, 3, 11, 17, 21, 23, 30, 49, 51, 54];
|
let shard_1 = vec![2, 3, 11, 17, 21, 23, 30, 49, 51, 54];
|
||||||
|
|
||||||
for v in shard_1 {
|
for v in shard_1 {
|
||||||
assert_eq!(sharder.pg_bigint_hash(v), 1);
|
assert_eq!(sharder.shard(v), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
let shard_2 = vec![6, 7, 15, 16, 18, 20, 25, 28, 34, 35];
|
let shard_2 = vec![6, 7, 15, 16, 18, 20, 25, 28, 34, 35];
|
||||||
|
|
||||||
for v in shard_2 {
|
for v in shard_2 {
|
||||||
assert_eq!(sharder.pg_bigint_hash(v), 2);
|
assert_eq!(sharder.shard(v), 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
let shard_3 = vec![8, 12, 13, 22, 29, 31, 33, 36, 41, 43];
|
let shard_3 = vec![8, 12, 13, 22, 29, 31, 33, 36, 41, 43];
|
||||||
|
|
||||||
for v in shard_3 {
|
for v in shard_3 {
|
||||||
assert_eq!(sharder.pg_bigint_hash(v), 3);
|
assert_eq!(sharder.shard(v), 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
let shard_4 = vec![9, 10, 24, 26, 27, 32, 37, 38, 42, 45];
|
let shard_4 = vec![9, 10, 24, 26, 27, 32, 37, 38, 42, 45];
|
||||||
|
|
||||||
for v in shard_4 {
|
for v in shard_4 {
|
||||||
assert_eq!(sharder.pg_bigint_hash(v), 4);
|
assert_eq!(sharder.shard(v), 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_sha1_hash() {
|
||||||
|
let sharder = Sharder::new(12, ShardingFunction::Sha1);
|
||||||
|
let ids = vec![
|
||||||
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
|
||||||
|
];
|
||||||
|
let shards = vec![
|
||||||
|
4, 7, 8, 3, 6, 0, 0, 10, 3, 11, 1, 7, 4, 4, 11, 2, 5, 0, 8, 3,
|
||||||
|
];
|
||||||
|
|
||||||
|
for (i, id) in ids.iter().enumerate() {
|
||||||
|
assert_eq!(sharder.shard(*id), shards[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user