2022-08-30 11:14:53 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
require_relative 'spec_helper'
|
|
|
|
|
|
|
|
|
|
describe "Miscellaneous" do
|
|
|
|
|
let(:processes) { Helpers::Pgcat.single_shard_setup("sharded_db", 5) }
|
|
|
|
|
after do
|
|
|
|
|
processes.all_databases.map(&:reset)
|
|
|
|
|
processes.pgcat.shutdown
|
|
|
|
|
end
|
|
|
|
|
|
2023-02-21 21:53:10 -06:00
|
|
|
context "when adding then removing instance using RELOAD" do
|
|
|
|
|
it "works correctly" do
|
|
|
|
|
admin_conn = PG::connect(processes.pgcat.admin_connection_string)
|
|
|
|
|
|
|
|
|
|
current_configs = processes.pgcat.current_config
|
|
|
|
|
correct_count = current_configs["pools"]["sharded_db"]["shards"]["0"]["servers"].count
|
|
|
|
|
expect(admin_conn.async_exec("SHOW DATABASES").count).to eq(correct_count)
|
|
|
|
|
|
|
|
|
|
extra_replica = current_configs["pools"]["sharded_db"]["shards"]["0"]["servers"].last.clone
|
|
|
|
|
extra_replica[0] = "127.0.0.1"
|
|
|
|
|
current_configs["pools"]["sharded_db"]["shards"]["0"]["servers"] << extra_replica
|
|
|
|
|
|
|
|
|
|
processes.pgcat.update_config(current_configs) # with replica added
|
|
|
|
|
processes.pgcat.reload_config
|
|
|
|
|
correct_count = current_configs["pools"]["sharded_db"]["shards"]["0"]["servers"].count
|
|
|
|
|
expect(admin_conn.async_exec("SHOW DATABASES").count).to eq(correct_count)
|
|
|
|
|
|
|
|
|
|
current_configs["pools"]["sharded_db"]["shards"]["0"]["servers"].pop
|
|
|
|
|
|
|
|
|
|
processes.pgcat.update_config(current_configs) # with replica removed again
|
|
|
|
|
processes.pgcat.reload_config
|
|
|
|
|
correct_count = current_configs["pools"]["sharded_db"]["shards"]["0"]["servers"].count
|
|
|
|
|
expect(admin_conn.async_exec("SHOW DATABASES").count).to eq(correct_count)
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
context "when removing then adding instance back using RELOAD" do
|
|
|
|
|
it "works correctly" do
|
|
|
|
|
admin_conn = PG::connect(processes.pgcat.admin_connection_string)
|
|
|
|
|
|
|
|
|
|
current_configs = processes.pgcat.current_config
|
|
|
|
|
correct_count = current_configs["pools"]["sharded_db"]["shards"]["0"]["servers"].count
|
|
|
|
|
expect(admin_conn.async_exec("SHOW DATABASES").count).to eq(correct_count)
|
|
|
|
|
|
|
|
|
|
removed_replica = current_configs["pools"]["sharded_db"]["shards"]["0"]["servers"].pop
|
|
|
|
|
processes.pgcat.update_config(current_configs) # with replica removed
|
|
|
|
|
processes.pgcat.reload_config
|
|
|
|
|
correct_count = current_configs["pools"]["sharded_db"]["shards"]["0"]["servers"].count
|
|
|
|
|
expect(admin_conn.async_exec("SHOW DATABASES").count).to eq(correct_count)
|
|
|
|
|
|
|
|
|
|
current_configs["pools"]["sharded_db"]["shards"]["0"]["servers"] << removed_replica
|
|
|
|
|
|
|
|
|
|
processes.pgcat.update_config(current_configs) # with replica added again
|
|
|
|
|
processes.pgcat.reload_config
|
|
|
|
|
correct_count = current_configs["pools"]["sharded_db"]["shards"]["0"]["servers"].count
|
|
|
|
|
expect(admin_conn.async_exec("SHOW DATABASES").count).to eq(correct_count)
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2023-02-08 11:35:38 -06:00
|
|
|
describe "TCP Keepalives" do
|
|
|
|
|
# Ideally, we should block TCP traffic to the database using
|
|
|
|
|
# iptables to mimic passive (connection is dropped without a RST packet)
|
|
|
|
|
# but we cannot do this in CircleCI because iptables requires NET_ADMIN
|
|
|
|
|
# capability that we cannot enable in CircleCI
|
|
|
|
|
# Toxiproxy won't work either because it does not block keepalives
|
|
|
|
|
# so our best bet is to query the OS keepalive params set on the socket
|
|
|
|
|
|
|
|
|
|
context "default settings" do
|
|
|
|
|
it "applies default keepalive settings" do
|
|
|
|
|
# We query ss command to verify that we have correct keepalive values set
|
|
|
|
|
# we can only verify the keepalives_idle parameter but that's good enough
|
|
|
|
|
# example output
|
|
|
|
|
#Recv-Q Send-Q Local Address:Port Peer Address:Port Process
|
|
|
|
|
#0 0 127.0.0.1:60526 127.0.0.1:18432 timer:(keepalive,1min59sec,0)
|
|
|
|
|
#0 0 127.0.0.1:60664 127.0.0.1:19432 timer:(keepalive,4.123ms,0)
|
|
|
|
|
|
|
|
|
|
port_search_criteria = processes.all_databases.map { |d| "dport = :#{d.port}"}.join(" or ")
|
|
|
|
|
results = `ss -t4 state established -o -at '( #{port_search_criteria} )'`.lines
|
|
|
|
|
results.shift
|
|
|
|
|
results.each { |line| expect(line).to match(/timer:\(keepalive,.*ms,0\)/) }
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
context "changed settings" do
|
|
|
|
|
it "applies keepalive settings from config" do
|
|
|
|
|
new_configs = processes.pgcat.current_config
|
|
|
|
|
|
|
|
|
|
new_configs["general"]["tcp_keepalives_idle"] = 120
|
|
|
|
|
new_configs["general"]["tcp_keepalives_count"] = 1
|
|
|
|
|
new_configs["general"]["tcp_keepalives_interval"] = 1
|
|
|
|
|
processes.pgcat.update_config(new_configs)
|
|
|
|
|
# We need to kill the old process that was using the default configs
|
|
|
|
|
processes.pgcat.stop
|
|
|
|
|
processes.pgcat.start
|
|
|
|
|
processes.pgcat.wait_until_ready
|
|
|
|
|
|
|
|
|
|
port_search_criteria = processes.all_databases.map { |d| "dport = :#{d.port}"}.join(" or ")
|
|
|
|
|
results = `ss -t4 state established -o -at '( #{port_search_criteria} )'`.lines
|
|
|
|
|
results.shift
|
|
|
|
|
results.each { |line| expect(line).to include("timer:(keepalive,1min") }
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2022-08-30 11:14:53 -05:00
|
|
|
describe "Extended Protocol handling" do
|
|
|
|
|
it "does not send packets that client does not expect during extended protocol sequence" do
|
|
|
|
|
new_configs = processes.pgcat.current_config
|
|
|
|
|
|
|
|
|
|
new_configs["general"]["connect_timeout"] = 500
|
|
|
|
|
new_configs["general"]["ban_time"] = 1
|
|
|
|
|
new_configs["general"]["shutdown_timeout"] = 1
|
|
|
|
|
new_configs["pools"]["sharded_db"]["users"]["0"]["pool_size"] = 1
|
|
|
|
|
|
|
|
|
|
processes.pgcat.update_config(new_configs)
|
|
|
|
|
processes.pgcat.reload_config
|
|
|
|
|
|
|
|
|
|
25.times do
|
|
|
|
|
Thread.new do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
conn.async_exec("SELECT pg_sleep(5)") rescue PG::SystemError
|
|
|
|
|
ensure
|
|
|
|
|
conn&.close
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
sleep(0.5)
|
|
|
|
|
conn_under_test = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
stdout, stderr = with_captured_stdout_stderr do
|
|
|
|
|
15.times do |i|
|
|
|
|
|
conn_under_test.async_exec("SELECT 1") rescue PG::SystemError
|
|
|
|
|
conn_under_test.exec_params("SELECT #{i} + $1", [i]) rescue PG::SystemError
|
|
|
|
|
sleep 1
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
raise StandardError, "Libpq got unexpected messages while idle" if stderr.include?("arrived from server while idle")
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
describe "Pool recycling after config reload" do
|
|
|
|
|
let(:processes) { Helpers::Pgcat.three_shard_setup("sharded_db", 5) }
|
|
|
|
|
|
|
|
|
|
it "should update pools for new clients and clients that are no longer in transaction" do
|
|
|
|
|
server_conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
server_conn.async_exec("BEGIN")
|
|
|
|
|
|
|
|
|
|
# No config change yet, client should set old configs
|
|
|
|
|
current_datebase_from_pg = server_conn.async_exec("SELECT current_database();")[0]["current_database"]
|
|
|
|
|
expect(current_datebase_from_pg).to eq('shard0')
|
|
|
|
|
|
|
|
|
|
# Swap shards
|
|
|
|
|
new_config = processes.pgcat.current_config
|
|
|
|
|
shard0 = new_config["pools"]["sharded_db"]["shards"]["0"]
|
|
|
|
|
shard1 = new_config["pools"]["sharded_db"]["shards"]["1"]
|
|
|
|
|
new_config["pools"]["sharded_db"]["shards"]["0"] = shard1
|
|
|
|
|
new_config["pools"]["sharded_db"]["shards"]["1"] = shard0
|
|
|
|
|
|
|
|
|
|
# Reload config
|
|
|
|
|
processes.pgcat.update_config(new_config)
|
|
|
|
|
processes.pgcat.reload_config
|
|
|
|
|
sleep 0.5
|
|
|
|
|
|
|
|
|
|
# Config changed but transaction is in progress, client should set old configs
|
|
|
|
|
current_datebase_from_pg = server_conn.async_exec("SELECT current_database();")[0]["current_database"]
|
|
|
|
|
expect(current_datebase_from_pg).to eq('shard0')
|
|
|
|
|
server_conn.async_exec("COMMIT")
|
|
|
|
|
|
|
|
|
|
# Transaction finished, client should get new configs
|
|
|
|
|
current_datebase_from_pg = server_conn.async_exec("SELECT current_database();")[0]["current_database"]
|
|
|
|
|
expect(current_datebase_from_pg).to eq('shard1')
|
|
|
|
|
|
|
|
|
|
# New connection should get new configs
|
|
|
|
|
server_conn.close()
|
|
|
|
|
server_conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
current_datebase_from_pg = server_conn.async_exec("SELECT current_database();")[0]["current_database"]
|
|
|
|
|
expect(current_datebase_from_pg).to eq('shard1')
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
describe "Clients closing connection in the middle of transaction" do
|
|
|
|
|
it "sends a rollback to the server" do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
conn.async_exec("SET SERVER ROLE to 'primary'")
|
|
|
|
|
conn.async_exec("BEGIN")
|
|
|
|
|
conn.close
|
|
|
|
|
|
|
|
|
|
expect(processes.primary.count_query("ROLLBACK")).to eq(1)
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2025-02-27 13:17:00 -06:00
|
|
|
describe "Checkout failure limit" do
|
|
|
|
|
context "when no checkout failure limit is set" do
|
|
|
|
|
before do
|
|
|
|
|
new_configs = processes.pgcat.current_config
|
|
|
|
|
new_configs["general"]["connect_timeout"] = 200
|
|
|
|
|
new_configs["pools"]["sharded_db"]["users"]["0"]["pool_size"] = 1
|
|
|
|
|
processes.pgcat.update_config(new_configs)
|
|
|
|
|
processes.pgcat.reload_config
|
|
|
|
|
sleep 0.5
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
it "does not disconnect client" do
|
|
|
|
|
Array.new(5) do
|
|
|
|
|
Thread.new do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
for i in 0..4
|
|
|
|
|
begin
|
|
|
|
|
conn.async_exec("SELECT pg_sleep(0.5);")
|
|
|
|
|
expect(conn.status).to eq(PG::CONNECTION_OK)
|
|
|
|
|
rescue PG::SystemError
|
|
|
|
|
expect(conn.status).to eq(PG::CONNECTION_OK)
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
conn.close
|
|
|
|
|
end
|
|
|
|
|
end.each(&:join)
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
context "when checkout failure limit is set high" do
|
|
|
|
|
before do
|
|
|
|
|
new_configs = processes.pgcat.current_config
|
|
|
|
|
new_configs["general"]["connect_timeout"] = 200
|
|
|
|
|
new_configs["pools"]["sharded_db"]["users"]["0"]["pool_size"] = 1
|
|
|
|
|
new_configs["pools"]["sharded_db"]["checkout_failure_limit"] = 10000
|
|
|
|
|
processes.pgcat.update_config(new_configs)
|
|
|
|
|
processes.pgcat.reload_config
|
|
|
|
|
sleep 0.5
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
it "does not disconnect client" do
|
|
|
|
|
Array.new(5) do
|
|
|
|
|
Thread.new do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
for i in 0..4
|
|
|
|
|
begin
|
|
|
|
|
conn.async_exec("SELECT pg_sleep(0.5);")
|
|
|
|
|
expect(conn.status).to eq(PG::CONNECTION_OK)
|
|
|
|
|
rescue PG::SystemError
|
|
|
|
|
expect(conn.status).to eq(PG::CONNECTION_OK)
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
conn.close
|
|
|
|
|
end
|
|
|
|
|
end.each(&:join)
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
context "when checkout failure limit is set low" do
|
|
|
|
|
before do
|
|
|
|
|
new_configs = processes.pgcat.current_config
|
|
|
|
|
new_configs["general"]["connect_timeout"] = 200
|
|
|
|
|
new_configs["pools"]["sharded_db"]["users"]["0"]["pool_size"] = 1
|
|
|
|
|
new_configs["pools"]["sharded_db"]["checkout_failure_limit"] = 2
|
|
|
|
|
processes.pgcat.update_config(new_configs)
|
|
|
|
|
processes.pgcat.reload_config
|
|
|
|
|
sleep 0.5
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
it "disconnects client after reaching limit" do
|
|
|
|
|
Array.new(5) do
|
|
|
|
|
Thread.new do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
checkout_failure_count = 0
|
|
|
|
|
for i in 0..4
|
|
|
|
|
begin
|
|
|
|
|
conn.async_exec("SELECT pg_sleep(1);")
|
|
|
|
|
expect(conn.status).to eq(PG::CONNECTION_OK)
|
|
|
|
|
rescue PG::SystemError
|
|
|
|
|
checkout_failure_count += 1
|
|
|
|
|
expect(conn.status).to eq(PG::CONNECTION_OK)
|
|
|
|
|
rescue PG::ConnectionBad
|
|
|
|
|
expect(checkout_failure_count).to eq(2)
|
|
|
|
|
expect(conn.status).to eq(PG::CONNECTION_BAD)
|
|
|
|
|
break
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
conn.close
|
|
|
|
|
end
|
|
|
|
|
end.each(&:join)
|
|
|
|
|
puts processes.pgcat.logs
|
|
|
|
|
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2022-08-30 11:14:53 -05:00
|
|
|
describe "Server version reporting" do
|
|
|
|
|
it "reports correct version for normal and admin databases" do
|
|
|
|
|
server_conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
expect(server_conn.server_version).not_to eq(0)
|
|
|
|
|
server_conn.close
|
|
|
|
|
|
|
|
|
|
admin_conn = PG::connect(processes.pgcat.admin_connection_string)
|
|
|
|
|
expect(admin_conn.server_version).not_to eq(0)
|
|
|
|
|
admin_conn.close
|
|
|
|
|
end
|
|
|
|
|
end
|
2022-09-01 22:06:55 -05:00
|
|
|
|
|
|
|
|
describe "State clearance" do
|
|
|
|
|
context "session mode" do
|
|
|
|
|
let(:processes) { Helpers::Pgcat.single_shard_setup("sharded_db", 5, "session") }
|
|
|
|
|
|
|
|
|
|
it "Clears state before connection checkin" do
|
|
|
|
|
# Both modes of operation should not raise
|
|
|
|
|
# ERROR: prepared statement "prepared_q" already exists
|
|
|
|
|
15.times do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
conn.async_exec("PREPARE prepared_q (int) AS SELECT $1")
|
|
|
|
|
conn.close
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
initial_value = conn.async_exec("SHOW statement_timeout")[0]["statement_timeout"]
|
|
|
|
|
conn.async_exec("SET statement_timeout to 1000")
|
|
|
|
|
current_value = conn.async_exec("SHOW statement_timeout")[0]["statement_timeout"]
|
|
|
|
|
expect(conn.async_exec("SHOW statement_timeout")[0]["statement_timeout"]).to eq("1s")
|
|
|
|
|
conn.close
|
|
|
|
|
end
|
|
|
|
|
|
2023-08-16 13:08:48 -04:00
|
|
|
it "Does not send RESET ALL unless necessary" do
|
2022-09-01 22:06:55 -05:00
|
|
|
10.times do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
conn.async_exec("SET SERVER ROLE to 'primary'")
|
|
|
|
|
conn.async_exec("SELECT 1")
|
|
|
|
|
conn.close
|
|
|
|
|
end
|
|
|
|
|
|
2023-08-16 13:08:48 -04:00
|
|
|
expect(processes.primary.count_query("RESET ALL")).to eq(0)
|
2022-09-01 22:06:55 -05:00
|
|
|
|
|
|
|
|
10.times do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
conn.async_exec("SET SERVER ROLE to 'primary'")
|
|
|
|
|
conn.async_exec("SELECT 1")
|
|
|
|
|
conn.async_exec("SET statement_timeout to 5000")
|
|
|
|
|
conn.close
|
|
|
|
|
end
|
|
|
|
|
|
2023-08-16 13:08:48 -04:00
|
|
|
expect(processes.primary.count_query("RESET ALL")).to eq(10)
|
2022-09-01 22:06:55 -05:00
|
|
|
end
|
2023-05-05 18:31:27 -04:00
|
|
|
|
|
|
|
|
it "Resets server roles correctly" do
|
|
|
|
|
10.times do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
conn.async_exec("SET SERVER ROLE to 'primary'")
|
|
|
|
|
conn.async_exec("SELECT 1")
|
|
|
|
|
conn.async_exec("SET statement_timeout to 5000")
|
|
|
|
|
conn.close
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
expect(processes.primary.count_query("RESET ROLE")).to eq(10)
|
Allow configuring routing decision when no shard is selected (#578)
The TL;DR for the change is that we allow QueryRouter to set the active shard to None. This signals to the Pool::get method that we have no shard selected. The get method follows a no_shard_specified_behavior config to know how to route the query.
Original PR description
Ruby-pg library makes a startup query to SET client_encoding to ... if Encoding.default_internal value is set (Code). This query is troublesome because we cannot possibly attach a routing comment to it. PgCat, by default, will route that query to the default shard.
Everything is fine until shard 0 has issues, Clients will all be attempting to send this query to shard0 which increases the connection latency significantly for all clients, even those not interested in shard0
This PR introduces no_shard_specified_behavior that defines the behavior in case we have routing-by-comment enabled but we get a query without a comment. The allowed behaviors are
random: Picks a shard at random
random_healthy: Picks a shard at random favoring shards with the least number of recent connection/checkout errors
shard_<number>: e.g. shard_0, shard_4, etc. picks a specific shard, everytime
In order to achieve this, this PR introduces an error_count on the Address Object that tracks the number of errors since the last checkout and uses that metric to sort shards by error count before making a routing decision.
I didn't want to use address stats to avoid introducing a routing dependency on internal stats (We might do that in the future but I prefer to avoid this for the time being.
I also made changes to the test environment to replace Ruby's TOML reader library, It appears to be abandoned and does not support mixed arrays (which we use in the config toml), and it also does not play nicely with single-quoted regular expressions. I opted for using yj which is a CLI tool that can convert from toml to JSON and back. So I refactor the tests to use that library.
2023-09-11 13:47:28 -05:00
|
|
|
end
|
2022-09-01 22:06:55 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
|
|
context "transaction mode" do
|
|
|
|
|
let(:processes) { Helpers::Pgcat.single_shard_setup("sharded_db", 5, "transaction") }
|
|
|
|
|
it "Clears state before connection checkin" do
|
|
|
|
|
# Both modes of operation should not raise
|
|
|
|
|
# ERROR: prepared statement "prepared_q" already exists
|
|
|
|
|
15.times do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
conn.async_exec("PREPARE prepared_q (int) AS SELECT $1")
|
|
|
|
|
conn.close
|
|
|
|
|
end
|
2022-09-07 22:37:17 -05:00
|
|
|
|
|
|
|
|
15.times do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
conn.prepare("prepared_q", "SELECT $1")
|
|
|
|
|
conn.close
|
|
|
|
|
end
|
2022-09-01 22:06:55 -05:00
|
|
|
end
|
|
|
|
|
|
2023-08-16 13:08:48 -04:00
|
|
|
it "Does not send RESET ALL unless necessary" do
|
2022-09-01 22:06:55 -05:00
|
|
|
10.times do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
conn.async_exec("SET SERVER ROLE to 'primary'")
|
|
|
|
|
conn.async_exec("SELECT 1")
|
2022-09-07 22:37:17 -05:00
|
|
|
conn.exec_params("SELECT $1", [1])
|
2022-09-01 22:06:55 -05:00
|
|
|
conn.close
|
|
|
|
|
end
|
|
|
|
|
|
2023-08-16 13:08:48 -04:00
|
|
|
expect(processes.primary.count_query("RESET ALL")).to eq(0)
|
2022-09-01 22:06:55 -05:00
|
|
|
|
|
|
|
|
10.times do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
conn.async_exec("SET SERVER ROLE to 'primary'")
|
|
|
|
|
conn.async_exec("SELECT 1")
|
|
|
|
|
conn.async_exec("SET statement_timeout to 5000")
|
|
|
|
|
conn.close
|
|
|
|
|
end
|
|
|
|
|
|
2023-08-16 13:08:48 -04:00
|
|
|
expect(processes.primary.count_query("RESET ALL")).to eq(10)
|
2022-09-01 22:06:55 -05:00
|
|
|
end
|
2023-08-10 11:18:46 -04:00
|
|
|
|
|
|
|
|
it "Respects tracked parameters on startup" do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user", parameters: { "application_name" => "my_pgcat_test" }))
|
|
|
|
|
|
|
|
|
|
expect(conn.async_exec("SHOW application_name")[0]["application_name"]).to eq("my_pgcat_test")
|
|
|
|
|
conn.close
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
it "Respect tracked parameter on set statemet" do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
|
|
|
|
|
conn.async_exec("SET application_name to 'my_pgcat_test'")
|
|
|
|
|
expect(conn.async_exec("SHOW application_name")[0]["application_name"]).to eq("my_pgcat_test")
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
it "Ignore untracked parameter on set statemet" do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
orignal_statement_timeout = conn.async_exec("SHOW statement_timeout")[0]["statement_timeout"]
|
|
|
|
|
|
|
|
|
|
conn.async_exec("SET statement_timeout to 1500")
|
|
|
|
|
expect(conn.async_exec("SHOW statement_timeout")[0]["statement_timeout"]).to eq(orignal_statement_timeout)
|
|
|
|
|
end
|
Allow configuring routing decision when no shard is selected (#578)
The TL;DR for the change is that we allow QueryRouter to set the active shard to None. This signals to the Pool::get method that we have no shard selected. The get method follows a no_shard_specified_behavior config to know how to route the query.
Original PR description
Ruby-pg library makes a startup query to SET client_encoding to ... if Encoding.default_internal value is set (Code). This query is troublesome because we cannot possibly attach a routing comment to it. PgCat, by default, will route that query to the default shard.
Everything is fine until shard 0 has issues, Clients will all be attempting to send this query to shard0 which increases the connection latency significantly for all clients, even those not interested in shard0
This PR introduces no_shard_specified_behavior that defines the behavior in case we have routing-by-comment enabled but we get a query without a comment. The allowed behaviors are
random: Picks a shard at random
random_healthy: Picks a shard at random favoring shards with the least number of recent connection/checkout errors
shard_<number>: e.g. shard_0, shard_4, etc. picks a specific shard, everytime
In order to achieve this, this PR introduces an error_count on the Address Object that tracks the number of errors since the last checkout and uses that metric to sort shards by error count before making a routing decision.
I didn't want to use address stats to avoid introducing a routing dependency on internal stats (We might do that in the future but I prefer to avoid this for the time being.
I also made changes to the test environment to replace Ruby's TOML reader library, It appears to be abandoned and does not support mixed arrays (which we use in the config toml), and it also does not play nicely with single-quoted regular expressions. I opted for using yj which is a CLI tool that can convert from toml to JSON and back. So I refactor the tests to use that library.
2023-09-11 13:47:28 -05:00
|
|
|
|
2022-09-01 22:06:55 -05:00
|
|
|
end
|
2023-02-08 11:35:38 -06:00
|
|
|
|
2022-10-13 22:33:12 -04:00
|
|
|
context "transaction mode with transactions" do
|
|
|
|
|
let(:processes) { Helpers::Pgcat.single_shard_setup("sharded_db", 5, "transaction") }
|
|
|
|
|
it "Does not clear set statement state when declared in a transaction" do
|
|
|
|
|
10.times do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
conn.async_exec("SET SERVER ROLE to 'primary'")
|
|
|
|
|
conn.async_exec("BEGIN")
|
|
|
|
|
conn.async_exec("SET statement_timeout to 1000")
|
|
|
|
|
conn.async_exec("COMMIT")
|
|
|
|
|
conn.close
|
2023-02-08 11:35:38 -06:00
|
|
|
end
|
2023-08-16 13:08:48 -04:00
|
|
|
expect(processes.primary.count_query("RESET ALL")).to eq(0)
|
2022-10-13 22:33:12 -04:00
|
|
|
|
|
|
|
|
10.times do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
conn.async_exec("SET SERVER ROLE to 'primary'")
|
|
|
|
|
conn.async_exec("BEGIN")
|
|
|
|
|
conn.async_exec("SET LOCAL statement_timeout to 1000")
|
|
|
|
|
conn.async_exec("COMMIT")
|
|
|
|
|
conn.close
|
2023-02-08 11:35:38 -06:00
|
|
|
end
|
2023-08-16 13:08:48 -04:00
|
|
|
expect(processes.primary.count_query("RESET ALL")).to eq(0)
|
2022-10-13 22:33:12 -04:00
|
|
|
end
|
|
|
|
|
end
|
2023-05-18 10:46:55 -07:00
|
|
|
|
|
|
|
|
context "server cleanup disabled" do
|
|
|
|
|
let(:processes) { Helpers::Pgcat.single_shard_setup("sharded_db", 1, "transaction", "random", "info", { "cleanup_server_connections" => false }) }
|
|
|
|
|
|
|
|
|
|
it "will not clean up connection state" do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
processes.primary.reset_stats
|
|
|
|
|
conn.async_exec("SET statement_timeout TO 1000")
|
|
|
|
|
conn.close
|
|
|
|
|
|
2023-08-16 13:08:48 -04:00
|
|
|
expect(processes.primary.count_query("RESET ALL")).to eq(0)
|
2023-05-18 10:46:55 -07:00
|
|
|
end
|
|
|
|
|
|
|
|
|
|
it "will not clean up prepared statements" do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
processes.primary.reset_stats
|
|
|
|
|
conn.async_exec("PREPARE prepared_q (int) AS SELECT $1")
|
|
|
|
|
|
|
|
|
|
conn.close
|
|
|
|
|
|
2023-08-16 13:08:48 -04:00
|
|
|
expect(processes.primary.count_query("RESET ALL")).to eq(0)
|
2023-05-18 10:46:55 -07:00
|
|
|
end
|
|
|
|
|
end
|
2022-09-01 22:06:55 -05:00
|
|
|
end
|
2023-03-24 11:20:30 -04:00
|
|
|
|
|
|
|
|
describe "Idle client timeout" do
|
|
|
|
|
context "idle transaction timeout set to 0" do
|
|
|
|
|
before do
|
|
|
|
|
current_configs = processes.pgcat.current_config
|
|
|
|
|
correct_idle_client_transaction_timeout = current_configs["general"]["idle_client_in_transaction_timeout"]
|
Allow configuring routing decision when no shard is selected (#578)
The TL;DR for the change is that we allow QueryRouter to set the active shard to None. This signals to the Pool::get method that we have no shard selected. The get method follows a no_shard_specified_behavior config to know how to route the query.
Original PR description
Ruby-pg library makes a startup query to SET client_encoding to ... if Encoding.default_internal value is set (Code). This query is troublesome because we cannot possibly attach a routing comment to it. PgCat, by default, will route that query to the default shard.
Everything is fine until shard 0 has issues, Clients will all be attempting to send this query to shard0 which increases the connection latency significantly for all clients, even those not interested in shard0
This PR introduces no_shard_specified_behavior that defines the behavior in case we have routing-by-comment enabled but we get a query without a comment. The allowed behaviors are
random: Picks a shard at random
random_healthy: Picks a shard at random favoring shards with the least number of recent connection/checkout errors
shard_<number>: e.g. shard_0, shard_4, etc. picks a specific shard, everytime
In order to achieve this, this PR introduces an error_count on the Address Object that tracks the number of errors since the last checkout and uses that metric to sort shards by error count before making a routing decision.
I didn't want to use address stats to avoid introducing a routing dependency on internal stats (We might do that in the future but I prefer to avoid this for the time being.
I also made changes to the test environment to replace Ruby's TOML reader library, It appears to be abandoned and does not support mixed arrays (which we use in the config toml), and it also does not play nicely with single-quoted regular expressions. I opted for using yj which is a CLI tool that can convert from toml to JSON and back. So I refactor the tests to use that library.
2023-09-11 13:47:28 -05:00
|
|
|
|
2023-03-24 11:20:30 -04:00
|
|
|
current_configs["general"]["idle_client_in_transaction_timeout"] = 0
|
Allow configuring routing decision when no shard is selected (#578)
The TL;DR for the change is that we allow QueryRouter to set the active shard to None. This signals to the Pool::get method that we have no shard selected. The get method follows a no_shard_specified_behavior config to know how to route the query.
Original PR description
Ruby-pg library makes a startup query to SET client_encoding to ... if Encoding.default_internal value is set (Code). This query is troublesome because we cannot possibly attach a routing comment to it. PgCat, by default, will route that query to the default shard.
Everything is fine until shard 0 has issues, Clients will all be attempting to send this query to shard0 which increases the connection latency significantly for all clients, even those not interested in shard0
This PR introduces no_shard_specified_behavior that defines the behavior in case we have routing-by-comment enabled but we get a query without a comment. The allowed behaviors are
random: Picks a shard at random
random_healthy: Picks a shard at random favoring shards with the least number of recent connection/checkout errors
shard_<number>: e.g. shard_0, shard_4, etc. picks a specific shard, everytime
In order to achieve this, this PR introduces an error_count on the Address Object that tracks the number of errors since the last checkout and uses that metric to sort shards by error count before making a routing decision.
I didn't want to use address stats to avoid introducing a routing dependency on internal stats (We might do that in the future but I prefer to avoid this for the time being.
I also made changes to the test environment to replace Ruby's TOML reader library, It appears to be abandoned and does not support mixed arrays (which we use in the config toml), and it also does not play nicely with single-quoted regular expressions. I opted for using yj which is a CLI tool that can convert from toml to JSON and back. So I refactor the tests to use that library.
2023-09-11 13:47:28 -05:00
|
|
|
|
2023-03-24 11:20:30 -04:00
|
|
|
processes.pgcat.update_config(current_configs) # with timeout 0
|
|
|
|
|
processes.pgcat.reload_config
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
it "Allow client to be idle in transaction" do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
conn.async_exec("BEGIN")
|
|
|
|
|
conn.async_exec("SELECT 1")
|
|
|
|
|
sleep(2)
|
|
|
|
|
conn.async_exec("COMMIT")
|
|
|
|
|
conn.close
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
context "idle transaction timeout set to 500ms" do
|
|
|
|
|
before do
|
|
|
|
|
current_configs = processes.pgcat.current_config
|
Allow configuring routing decision when no shard is selected (#578)
The TL;DR for the change is that we allow QueryRouter to set the active shard to None. This signals to the Pool::get method that we have no shard selected. The get method follows a no_shard_specified_behavior config to know how to route the query.
Original PR description
Ruby-pg library makes a startup query to SET client_encoding to ... if Encoding.default_internal value is set (Code). This query is troublesome because we cannot possibly attach a routing comment to it. PgCat, by default, will route that query to the default shard.
Everything is fine until shard 0 has issues, Clients will all be attempting to send this query to shard0 which increases the connection latency significantly for all clients, even those not interested in shard0
This PR introduces no_shard_specified_behavior that defines the behavior in case we have routing-by-comment enabled but we get a query without a comment. The allowed behaviors are
random: Picks a shard at random
random_healthy: Picks a shard at random favoring shards with the least number of recent connection/checkout errors
shard_<number>: e.g. shard_0, shard_4, etc. picks a specific shard, everytime
In order to achieve this, this PR introduces an error_count on the Address Object that tracks the number of errors since the last checkout and uses that metric to sort shards by error count before making a routing decision.
I didn't want to use address stats to avoid introducing a routing dependency on internal stats (We might do that in the future but I prefer to avoid this for the time being.
I also made changes to the test environment to replace Ruby's TOML reader library, It appears to be abandoned and does not support mixed arrays (which we use in the config toml), and it also does not play nicely with single-quoted regular expressions. I opted for using yj which is a CLI tool that can convert from toml to JSON and back. So I refactor the tests to use that library.
2023-09-11 13:47:28 -05:00
|
|
|
correct_idle_client_transaction_timeout = current_configs["general"]["idle_client_in_transaction_timeout"]
|
2023-03-24 11:20:30 -04:00
|
|
|
current_configs["general"]["idle_client_in_transaction_timeout"] = 500
|
Allow configuring routing decision when no shard is selected (#578)
The TL;DR for the change is that we allow QueryRouter to set the active shard to None. This signals to the Pool::get method that we have no shard selected. The get method follows a no_shard_specified_behavior config to know how to route the query.
Original PR description
Ruby-pg library makes a startup query to SET client_encoding to ... if Encoding.default_internal value is set (Code). This query is troublesome because we cannot possibly attach a routing comment to it. PgCat, by default, will route that query to the default shard.
Everything is fine until shard 0 has issues, Clients will all be attempting to send this query to shard0 which increases the connection latency significantly for all clients, even those not interested in shard0
This PR introduces no_shard_specified_behavior that defines the behavior in case we have routing-by-comment enabled but we get a query without a comment. The allowed behaviors are
random: Picks a shard at random
random_healthy: Picks a shard at random favoring shards with the least number of recent connection/checkout errors
shard_<number>: e.g. shard_0, shard_4, etc. picks a specific shard, everytime
In order to achieve this, this PR introduces an error_count on the Address Object that tracks the number of errors since the last checkout and uses that metric to sort shards by error count before making a routing decision.
I didn't want to use address stats to avoid introducing a routing dependency on internal stats (We might do that in the future but I prefer to avoid this for the time being.
I also made changes to the test environment to replace Ruby's TOML reader library, It appears to be abandoned and does not support mixed arrays (which we use in the config toml), and it also does not play nicely with single-quoted regular expressions. I opted for using yj which is a CLI tool that can convert from toml to JSON and back. So I refactor the tests to use that library.
2023-09-11 13:47:28 -05:00
|
|
|
|
2023-03-24 11:20:30 -04:00
|
|
|
processes.pgcat.update_config(current_configs) # with timeout 500
|
|
|
|
|
processes.pgcat.reload_config
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
it "Allow client to be idle in transaction below timeout" do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
conn.async_exec("BEGIN")
|
|
|
|
|
conn.async_exec("SELECT 1")
|
|
|
|
|
sleep(0.4) # below 500ms
|
|
|
|
|
conn.async_exec("COMMIT")
|
|
|
|
|
conn.close
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
it "Error when client idle in transaction time exceeds timeout" do
|
|
|
|
|
conn = PG::connect(processes.pgcat.connection_string("sharded_db", "sharding_user"))
|
|
|
|
|
conn.async_exec("BEGIN")
|
|
|
|
|
conn.async_exec("SELECT 1")
|
|
|
|
|
sleep(1) # above 500ms
|
Allow configuring routing decision when no shard is selected (#578)
The TL;DR for the change is that we allow QueryRouter to set the active shard to None. This signals to the Pool::get method that we have no shard selected. The get method follows a no_shard_specified_behavior config to know how to route the query.
Original PR description
Ruby-pg library makes a startup query to SET client_encoding to ... if Encoding.default_internal value is set (Code). This query is troublesome because we cannot possibly attach a routing comment to it. PgCat, by default, will route that query to the default shard.
Everything is fine until shard 0 has issues, Clients will all be attempting to send this query to shard0 which increases the connection latency significantly for all clients, even those not interested in shard0
This PR introduces no_shard_specified_behavior that defines the behavior in case we have routing-by-comment enabled but we get a query without a comment. The allowed behaviors are
random: Picks a shard at random
random_healthy: Picks a shard at random favoring shards with the least number of recent connection/checkout errors
shard_<number>: e.g. shard_0, shard_4, etc. picks a specific shard, everytime
In order to achieve this, this PR introduces an error_count on the Address Object that tracks the number of errors since the last checkout and uses that metric to sort shards by error count before making a routing decision.
I didn't want to use address stats to avoid introducing a routing dependency on internal stats (We might do that in the future but I prefer to avoid this for the time being.
I also made changes to the test environment to replace Ruby's TOML reader library, It appears to be abandoned and does not support mixed arrays (which we use in the config toml), and it also does not play nicely with single-quoted regular expressions. I opted for using yj which is a CLI tool that can convert from toml to JSON and back. So I refactor the tests to use that library.
2023-09-11 13:47:28 -05:00
|
|
|
expect{ conn.async_exec("COMMIT") }.to raise_error(PG::SystemError, /idle transaction timeout/)
|
2023-03-24 11:20:30 -04:00
|
|
|
conn.async_exec("SELECT 1") # should be able to send another query
|
|
|
|
|
conn.close
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
end
|
2022-08-30 11:14:53 -05:00
|
|
|
end
|