Another example of a sharding function (#41)

* Another example of a sharding function

* tests
This commit is contained in:
Lev Kokotov
2022-02-23 11:47:24 -08:00
committed by Lev Kokotov
parent dce72ba262
commit b3c8ca4b8a
4 changed files with 99 additions and 12 deletions

View File

@@ -1,26 +1,62 @@
use sha1::{Digest, Sha1};
// https://github.com/postgres/postgres/blob/27b77ecf9f4d5be211900eda54d8155ada50d696/src/include/catalog/partition.h#L20
const PARTITION_HASH_SEED: u64 = 0x7A5B22367996DCFD;
#[derive(Debug, PartialEq, Copy, Clone)]
pub enum ShardingFunction {
PgBigintHash,
Sha1,
}
pub struct Sharder {
shards: usize,
sharding_function: ShardingFunction,
}
impl Sharder {
pub fn new(shards: usize) -> Sharder {
Sharder { shards: shards }
pub fn new(shards: usize, sharding_function: ShardingFunction) -> Sharder {
Sharder {
shards,
sharding_function,
}
}
pub fn shard(&self, key: i64) -> usize {
match self.sharding_function {
ShardingFunction::PgBigintHash => self.pg_bigint_hash(key),
ShardingFunction::Sha1 => self.sha1(key),
}
}
/// Hash function used by Postgres to determine which partition
/// to put the row in when using HASH(column) partitioning.
/// Source: https://github.com/postgres/postgres/blob/27b77ecf9f4d5be211900eda54d8155ada50d696/src/common/hashfn.c#L631
/// Supports only 1 bigint at the moment, but we can add more later.
pub fn pg_bigint_hash(&self, key: i64) -> usize {
fn pg_bigint_hash(&self, key: i64) -> usize {
let mut lohalf = key as u32;
let hihalf = (key >> 32) as u32;
lohalf ^= if key >= 0 { hihalf } else { !hihalf };
Self::combine(0, Self::pg_u32_hash(lohalf)) as usize % self.shards
}
/// Example of a hashing function based on SHA1.
fn sha1(&self, key: i64) -> usize {
let mut hasher = Sha1::new();
hasher.update(&key.to_string().as_bytes());
let result = hasher.finalize();
// Convert the SHA1 hash into hex so we can parse it as a large integer.
let hex = format!("{:x}", result);
// Parse the last 8 bytes as an integer (8 bytes = bigint).
let key = i64::from_str_radix(&hex[hex.len() - 8..], 16).unwrap() as usize;
key % self.shards
}
#[inline]
fn rot(x: u32, k: u32) -> u32 {
(x << k) | (x >> (32 - k))
@@ -109,36 +145,51 @@ mod test {
// confirming that we implemented Postgres BIGINT hashing correctly.
#[test]
fn test_pg_bigint_hash() {
let sharder = Sharder::new(5);
let sharder = Sharder::new(5, ShardingFunction::PgBigintHash);
let shard_0 = vec![1, 4, 5, 14, 19, 39, 40, 46, 47, 53];
for v in shard_0 {
assert_eq!(sharder.pg_bigint_hash(v), 0);
assert_eq!(sharder.shard(v), 0);
}
let shard_1 = vec![2, 3, 11, 17, 21, 23, 30, 49, 51, 54];
for v in shard_1 {
assert_eq!(sharder.pg_bigint_hash(v), 1);
assert_eq!(sharder.shard(v), 1);
}
let shard_2 = vec![6, 7, 15, 16, 18, 20, 25, 28, 34, 35];
for v in shard_2 {
assert_eq!(sharder.pg_bigint_hash(v), 2);
assert_eq!(sharder.shard(v), 2);
}
let shard_3 = vec![8, 12, 13, 22, 29, 31, 33, 36, 41, 43];
for v in shard_3 {
assert_eq!(sharder.pg_bigint_hash(v), 3);
assert_eq!(sharder.shard(v), 3);
}
let shard_4 = vec![9, 10, 24, 26, 27, 32, 37, 38, 42, 45];
for v in shard_4 {
assert_eq!(sharder.pg_bigint_hash(v), 4);
assert_eq!(sharder.shard(v), 4);
}
}
#[test]
fn test_sha1_hash() {
let sharder = Sharder::new(12, ShardingFunction::Sha1);
let ids = vec![
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
];
let shards = vec![
4, 7, 8, 3, 6, 0, 0, 10, 3, 11, 1, 7, 4, 4, 11, 2, 5, 0, 8, 3,
];
for (i, id) in ids.iter().enumerate() {
assert_eq!(sharder.shard(*id), shards[i]);
}
}
}