[patches] [PATCH vshard 4/5] router: connect to all replicas including slaves
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Fri Mar 2 00:39:40 MSK 2018
There are two reasons. First - if a master is changed on a
replicaset, then a router must immediately switch to it all
'write' requests. Now to do it a router creates a new connection
to a new master.
Second reason: #75. To fast detect master role change on a
replica a router must be connected to all replicas of a
replicaset. Else to establish connections to slaves the router
will spend to many time.
Closes #76
Signed-off-by: Vladislav Shpilevoy <v.shpilevoy at tarantool.org>
---
test/router/router.result | 11 ++++++++--
test/router/router.test.lua | 5 ++++-
vshard/replicaset.lua | 51 ++++++++++++++++-----------------------------
vshard/router/init.lua | 5 ++++-
4 files changed, 35 insertions(+), 37 deletions(-)
diff --git a/test/router/router.result b/test/router/router.result
index c7b3437..4bdb3bf 100644
--- a/test/router/router.result
+++ b/test/router/router.result
@@ -101,6 +101,9 @@ old_replicasets = vshard.router.internal.replicasets
old_connections = {}
---
...
+connection_count = 0
+---
+...
test_run:cmd("setopt delimiter ';'")
---
- true
@@ -108,6 +111,7 @@ test_run:cmd("setopt delimiter ';'")
for _, old_rs in pairs(old_replicasets) do
for uuid, old_replica in pairs(old_rs.replicas) do
old_connections[uuid] = old_replica.conn
+ connection_count = connection_count + 1
end
end;
---
@@ -116,6 +120,10 @@ test_run:cmd("setopt delimiter ''");
---
- true
...
+connection_count == 4
+---
+- true
+...
vshard.router.cfg(cfg)
---
...
@@ -389,9 +397,8 @@ rs.master = master
---
...
-- Test reconnect on bucker_route().
-rs:disconnect()
+master.conn:close()
---
-- true
...
conn = vshard.router.route(1):connect()
---
diff --git a/test/router/router.test.lua b/test/router/router.test.lua
index 5a51e2d..fcd54e1 100644
--- a/test/router/router.test.lua
+++ b/test/router/router.test.lua
@@ -41,13 +41,16 @@ rs2.replica == rs2.master
--
old_replicasets = vshard.router.internal.replicasets
old_connections = {}
+connection_count = 0
test_run:cmd("setopt delimiter ';'")
for _, old_rs in pairs(old_replicasets) do
for uuid, old_replica in pairs(old_rs.replicas) do
old_connections[uuid] = old_replica.conn
+ connection_count = connection_count + 1
end
end;
test_run:cmd("setopt delimiter ''");
+connection_count == 4
vshard.router.cfg(cfg)
new_replicasets = vshard.router.internal.replicasets
old_replicasets ~= new_replicasets
@@ -143,7 +146,7 @@ rs.master = nil
vshard.router.route(1).master
rs.master = master
-- Test reconnect on bucker_route().
-rs:disconnect()
+master.conn:close()
conn = vshard.router.route(1):connect()
conn:wait_connected()
conn.state
diff --git a/vshard/replicaset.lua b/vshard/replicaset.lua
index 84799df..56687be 100644
--- a/vshard/replicaset.lua
+++ b/vshard/replicaset.lua
@@ -113,7 +113,7 @@ end
--
-- Create net.box connection to master.
--
-local function replicaset_connect(replicaset)
+local function replicaset_connect_master(replicaset)
local master = replicaset.master
if master == nil then
return nil, lerror.vshard(lerror.code.MASTER_IS_MISSING,
@@ -122,6 +122,15 @@ local function replicaset_connect(replicaset)
return replicaset_connect_to_replica(replicaset, master)
end
+--
+-- Create net.box connections to all replicas and master.
+--
+local function replicaset_connect_all(replicaset)
+ for _, replica in pairs(replicaset.replicas) do
+ replicaset_connect_to_replica(replicaset, replica)
+ end
+end
+
--
-- Make a replica be used for read requests or be candidate.
-- @param replicaset Replicaset for which a replica is set.
@@ -132,17 +141,9 @@ end
--
local function replicaset_make_replica_read(replicaset, replica, read_name)
assert(read_name == 'replica' or read_name == 'candidate')
- local old_replica = replicaset[read_name]
- assert(old_replica ~= replica)
- local conn = replicaset_connect_to_replica(replicaset, replica)
+ assert(replicaset[read_name] ~= replica)
+ replicaset_connect_to_replica(replicaset, replica)
replicaset[read_name] = replica
- if old_replica and old_replica ~= replicaset.master then
- assert(conn ~= old_replica.conn)
- -- Each unused connection holds a worker fiber. Close them
- -- to return fibers in pool now. Do not wait lua gc - it
- -- is slow as fuck.
- old_replica.conn:close()
- end
end
--
@@ -214,24 +215,6 @@ local function replicaset_set_candidate_as_replica(replicaset)
old_replica.weight >= replicaset.replica.weight and
old_replica ~= replicaset.replica)
replicaset.candidate = nil
- if old_replica and old_replica.conn and
- old_replica ~= replicaset.master then
- old_replica.conn:close()
- end
-end
-
---
--- Destroy net.box connection to master
---
-local function replicaset_disconnect(replicaset)
- local master = replicaset.master
- if master == nil then
- return true
- end
- local conn = replicaset.master.conn
- replicaset.master.conn = nil
- conn:close()
- return true
end
--
@@ -320,7 +303,7 @@ local function replicaset_master_call(replicaset, func, args, opts)
assert(opts == nil or type(opts) == 'table')
assert(type(func) == 'string', 'function name')
assert(args == nil or type(args) == 'table', 'function arguments')
- replicaset_connect(replicaset)
+ replicaset_connect_master(replicaset)
local timeout = opts and opts.timeout or replicaset.master.net_timeout
local net_status, storage_status, retval, error_object =
replica_call(replicaset.master, func, args, timeout)
@@ -363,7 +346,7 @@ local function replicaset_nearest_call(replicaset, func, args, opts)
if replica and replica:is_connected() then
conn = replica.conn
else
- conn = replicaset_connect(replicaset)
+ conn = replicaset_connect_master(replicaset)
replica = replicaset.master
end
net_status, storage_status, retval, error_object =
@@ -426,12 +409,14 @@ end
--
local replicaset_mt = {
__index = {
- connect = replicaset_connect;
+ connect = replicaset_connect_master;
+ connect_master = replicaset_connect_master;
+ connect_all = replicaset_connect_all;
+ connect_replica = replicaset_connect_to_replica;
rebind_connections = replicaset_rebind_connections;
update_candidate = replicaset_update_candidate;
down_replica_priority = replicaset_down_replica_priority;
set_candidate_as_replica = replicaset_set_candidate_as_replica;
- disconnect = replicaset_disconnect;
call = replicaset_master_call;
callrw = replicaset_master_call;
callro = replicaset_nearest_call;
diff --git a/vshard/router/init.lua b/vshard/router/init.lua
index 6814637..d4d5aed 100644
--- a/vshard/router/init.lua
+++ b/vshard/router/init.lua
@@ -311,7 +311,10 @@ local function failover_ping_round()
if not replica.conn:ping({timeout = 5}) then
log.info('Ping error from %s: perhaps a connection is down',
replica)
+ -- Connection hangs. Recreate it to be able to
+ -- fail over to a replica next by priority.
replica.conn:close()
+ replicaset:connect_replica(replica)
end
end
end
@@ -515,7 +518,7 @@ local function router_cfg(cfg)
-- Now the new replicasets are fully built. Can establish
-- connections and yield.
for _, replicaset in pairs(new_replicasets) do
- replicaset:connect()
+ replicaset:connect_all()
replicaset:update_candidate()
end
lreplicaset.wait_masters_connect(new_replicasets)
--
2.14.3 (Apple Git-98)
More information about the Tarantool-patches
mailing list