mirror of
https://github.com/postgresml/pgcat.git
synced 2026-03-23 09:26:30 +00:00
We identified a bug where RELOAD fails to update the pools. To reproduce you need to start at some config state, modify that state a bit, reload, revert the configs back to the original state, and reload. The last reload will fail to update the pool because PgCat "thinks" the pool state didn't change. This is because we use a HashSet to keep track of config hashes but we never remove values from it. Say we start with State A, we modify pool configs to State B and reload. Now the POOL_HASHES struct has State A and State B. Attempting to go back to State A will encounter a hashset hit which is interpreted by PgCat as "Configs are the same, no need to reload pools" We fix this by attaching a config_hash value to ConnectionPool object and we calculate that value when we create the pool. This eliminates the need for a global variable. One shortcoming here is that changing any config under one user in the pool will trigger a reload for the entire pool (which is fine I think)
166 lines
5.2 KiB
Ruby
166 lines
5.2 KiB
Ruby
# frozen_string_literal: true
|
|
require_relative 'spec_helper'
|
|
|
|
describe "Random Load Balancing" do
|
|
let(:processes) { Helpers::Pgcat.single_shard_setup("sharded_db", 5) }
|
|
after do
|
|
processes.all_databases.map(&:reset)
|
|
processes.pgcat.shutdown
|
|
end
|
|
|
|
context "under regular circumstances" do
|
|
it "balances query volume between all instances" do
|
|
conn = PG.connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
query_count = QUERY_COUNT
|
|
expected_share = query_count / processes.all_databases.count
|
|
failed_count = 0
|
|
|
|
query_count.times do
|
|
conn.async_exec("SELECT 1 + 2")
|
|
rescue
|
|
failed_count += 1
|
|
end
|
|
|
|
expect(failed_count).to eq(0)
|
|
processes.all_databases.map(&:count_select_1_plus_2).each do |instance_share|
|
|
expect(instance_share).to be_within(expected_share * MARGIN_OF_ERROR).of(expected_share)
|
|
end
|
|
end
|
|
end
|
|
|
|
context "when some replicas are down" do
|
|
it "balances query volume between working instances" do
|
|
conn = PG.connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
expected_share = QUERY_COUNT / (processes.all_databases.count - 2)
|
|
failed_count = 0
|
|
|
|
processes[:replicas][0].take_down do
|
|
processes[:replicas][1].take_down do
|
|
QUERY_COUNT.times do
|
|
conn.async_exec("SELECT 1 + 2")
|
|
rescue
|
|
conn = PG.connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
failed_count += 1
|
|
end
|
|
end
|
|
end
|
|
|
|
expect(failed_count).to be <= 2
|
|
processes.all_databases.each do |instance|
|
|
queries_routed = instance.count_select_1_plus_2
|
|
if processes.replicas[0..1].include?(instance)
|
|
expect(queries_routed).to eq(0)
|
|
else
|
|
expect(queries_routed).to be_within(expected_share * MARGIN_OF_ERROR).of(expected_share)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
describe "Least Outstanding Queries Load Balancing" do
|
|
let(:processes) { Helpers::Pgcat.single_shard_setup("sharded_db", 1, "transaction", "loc") }
|
|
after do
|
|
processes.all_databases.map(&:reset)
|
|
processes.pgcat.shutdown
|
|
end
|
|
|
|
context "under homogenous load" do
|
|
it "balances query volume between all instances" do
|
|
conn = PG.connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
query_count = QUERY_COUNT
|
|
expected_share = query_count / processes.all_databases.count
|
|
failed_count = 0
|
|
|
|
query_count.times do
|
|
conn.async_exec("SELECT 1 + 2")
|
|
rescue
|
|
failed_count += 1
|
|
end
|
|
|
|
expect(failed_count).to eq(0)
|
|
processes.all_databases.map(&:count_select_1_plus_2).each do |instance_share|
|
|
expect(instance_share).to be_within(expected_share * MARGIN_OF_ERROR).of(expected_share)
|
|
end
|
|
end
|
|
end
|
|
|
|
context "under heterogeneous load" do
|
|
xit "balances query volume between all instances based on how busy they are" do
|
|
slow_query_count = 2
|
|
threads = Array.new(slow_query_count) do
|
|
Thread.new do
|
|
conn = PG.connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
conn.async_exec("BEGIN")
|
|
end
|
|
end
|
|
|
|
conn = PG.connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
query_count = QUERY_COUNT
|
|
expected_share = query_count / (processes.all_databases.count - slow_query_count)
|
|
failed_count = 0
|
|
|
|
query_count.times do
|
|
conn.async_exec("SELECT 1 + 2")
|
|
rescue
|
|
failed_count += 1
|
|
end
|
|
|
|
expect(failed_count).to eq(0)
|
|
# Under LOQ, we expect replicas running the slow pg_sleep
|
|
# to get no selects
|
|
expect(
|
|
processes.
|
|
all_databases.
|
|
map(&:count_select_1_plus_2).
|
|
count { |instance_share| instance_share == 0 }
|
|
).to eq(slow_query_count)
|
|
|
|
# We also expect the quick queries to be spread across
|
|
# the idle servers only
|
|
processes.
|
|
all_databases.
|
|
map(&:count_select_1_plus_2).
|
|
reject { |instance_share| instance_share == 0 }.
|
|
each do |instance_share|
|
|
expect(instance_share).to be_within(expected_share * MARGIN_OF_ERROR).of(expected_share)
|
|
end
|
|
|
|
threads.map(&:join)
|
|
end
|
|
end
|
|
|
|
context "when some replicas are down" do
|
|
it "balances query volume between working instances" do
|
|
conn = PG.connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
expected_share = QUERY_COUNT / (processes.all_databases.count - 2)
|
|
failed_count = 0
|
|
|
|
processes[:replicas][0].take_down do
|
|
processes[:replicas][1].take_down do
|
|
QUERY_COUNT.times do
|
|
conn.async_exec("SELECT 1 + 2")
|
|
rescue
|
|
conn = PG.connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
failed_count += 1
|
|
end
|
|
end
|
|
end
|
|
|
|
expect(failed_count).to be <= 2
|
|
processes.all_databases.each do |instance|
|
|
queries_routed = instance.count_select_1_plus_2
|
|
if processes.replicas[0..1].include?(instance)
|
|
expect(queries_routed).to eq(0)
|
|
else
|
|
expect(queries_routed).to be_within(expected_share * MARGIN_OF_ERROR).of(expected_share)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|