[patches] [PATCH vshard 5/5] router: get rid of replicaset.candidate

Fri Mar 2 00:39:41 MSK 2018

After #76 is closed, a router holds connections to all replicas,
including slaves. Replicaset.candidate was used by failover
algorithm to try to connect to replicas with a higher priority,
than a current replica has. But now all replicas already have
connections, and candidate is not needed anymore.

Follow up #76

Signed-off-by: Vladislav Shpilevoy <v.shpilevoy at tarantool.org>
---
 test/failover/failover.result   |  67 +++++---------------------
 test/failover/failover.test.lua |  29 ++----------
 vshard/replicaset.lua           | 101 +++++++++++-----------------------------
 vshard/router/init.lua          |  46 +++---------------
 4 files changed, 48 insertions(+), 195 deletions(-)

diff --git a/test/failover/failover.result b/test/failover/failover.result
index 1a94f63..73a4250 100644
--- a/test/failover/failover.result
+++ b/test/failover/failover.result
@@ -177,6 +177,9 @@ test_run:switch('router_1')
 rs1 = vshard.router.internal.replicasets[rs_uuid[1]]
 ---
 ...
+while not rs1.replica_up_ts do fiber.sleep(0.1) end
+---
+...
 old_up_ts = rs1.replica_up_ts
 ---
 ...
@@ -252,37 +255,24 @@ test_run:switch('router_1')
 ---
 - true
 ...
--- Once per FAILOVER_UP_TIMEOUT a router tries to reconnect to the
--- best replica.
-while rs1.candidate == nil or rs1.candidate.down_ts == nil do fiber.sleep(0.1) end
----
-...
--- Check that the original replica is used, while a candidate
--- tries to connnect.
-vshard.router.call(1, 'read', 'echo', {123})
----
-- 123
-...
-test_run:switch('box_1_b')
+-- Revive the best replica. A router must reconnect to it in
+-- FAILOVER_UP_TIMEOUT seconds.
+test_run:cmd('start server box_1_d')
 ---
 - true
 ...
-echo_count
+ts1 = fiber.time()
 ---
-- 2
 ...
-test_run:switch('router_1')
+while rs1.replica.name ~= 'box_1_d' do fiber.sleep(0.1) end
 ---
-- true
 ...
--- Revive the best replica. A router must reconnect to it in
--- FAILOVER_UP_TIMEOUT seconds.
-test_run:cmd('start server box_1_d')
+ts2 = fiber.time()
 ---
-- true
 ...
-while rs1.replica.name ~= 'box_1_d' do fiber.sleep(0.1) end
+ts2 - ts1 < vshard.consts.FAILOVER_UP_TIMEOUT
 ---
+- true
 ...
 test_run:grep_log('router_1', 'New replica box_1_d%(storage%@')
 ---
@@ -309,41 +299,6 @@ rs1.replica.conn == rs1.master.conn
 ---
 - true
 ...
---
--- Ensure the candidate downs its priority until mets a current
--- replica.
---
-while not rs1.candidate or rs1.candidate.name ~= 'box_1_d' do fiber.sleep(0.1) end
----
-...
-ts1 = fiber.time()
----
-...
-while rs1.candidate.name ~= 'box_1_b' do fiber.sleep(0.1) end
----
-...
-ts2 = fiber.time()
----
-...
--- Ensure the candidate is updated more often than once per
--- UP_TIMEOUT seconds.
-ts2 - ts1 < vshard.consts.FAILOVER_UP_TIMEOUT
----
-- true
-...
-ts1 = ts2
----
-...
-while rs1.candidate.name ~= 'box_1_c' do fiber.sleep(0.1) end
----
-...
-ts2 = fiber.time()
----
-...
-ts2 - ts1 < vshard.consts.FAILOVER_UP_TIMEOUT
----
-- true
-...
 test_run:cmd('start server box_1_b with wait=False, wait_load=False')
 ---
 - true
diff --git a/test/failover/failover.test.lua b/test/failover/failover.test.lua
index bee1216..6e06314 100644
--- a/test/failover/failover.test.lua
+++ b/test/failover/failover.test.lua
@@ -75,6 +75,7 @@ echo_count
 -- Ensure that replica_up_ts is updated periodically.
 test_run:switch('router_1')
 rs1 = vshard.router.internal.replicasets[rs_uuid[1]]
+while not rs1.replica_up_ts do fiber.sleep(0.1) end
 old_up_ts = rs1.replica_up_ts
 while rs1.replica_up_ts == old_up_ts do fiber.sleep(0.1) end
 rs1.replica_up_ts - old_up_ts >= vshard.consts.FAILOVER_UP_TIMEOUT
@@ -105,20 +106,13 @@ test_run:cmd('switch box_1_b')
 echo_count
 test_run:switch('router_1')
 
--- Once per FAILOVER_UP_TIMEOUT a router tries to reconnect to the
--- best replica.
-while rs1.candidate == nil or rs1.candidate.down_ts == nil do fiber.sleep(0.1) end
--- Check that the original replica is used, while a candidate
--- tries to connnect.
-vshard.router.call(1, 'read', 'echo', {123})
-test_run:switch('box_1_b')
-echo_count
-test_run:switch('router_1')
-
 -- Revive the best replica. A router must reconnect to it in
 -- FAILOVER_UP_TIMEOUT seconds.
 test_run:cmd('start server box_1_d')
+ts1 = fiber.time()
 while rs1.replica.name ~= 'box_1_d' do fiber.sleep(0.1) end
+ts2 = fiber.time()
+ts2 - ts1 < vshard.consts.FAILOVER_UP_TIMEOUT
 test_run:grep_log('router_1', 'New replica box_1_d%(storage%@')
 
 -- Ensure the master connection is used as replica's one instead
@@ -128,21 +122,6 @@ test_run:cmd('stop server box_1_c')
 test_run:cmd('stop server box_1_d')
 while rs1.replica.name ~= 'box_1_a' do fiber.sleep(0.1) end
 rs1.replica.conn == rs1.master.conn
---
--- Ensure the candidate downs its priority until mets a current
--- replica.
---
-while not rs1.candidate or rs1.candidate.name ~= 'box_1_d' do fiber.sleep(0.1) end
-ts1 = fiber.time()
-while rs1.candidate.name ~= 'box_1_b' do fiber.sleep(0.1) end
-ts2 = fiber.time()
--- Ensure the candidate is updated more often than once per
--- UP_TIMEOUT seconds.
-ts2 - ts1 < vshard.consts.FAILOVER_UP_TIMEOUT
-ts1 = ts2
-while rs1.candidate.name ~= 'box_1_c' do fiber.sleep(0.1) end
-ts2 = fiber.time()
-ts2 - ts1 < vshard.consts.FAILOVER_UP_TIMEOUT
 
 test_run:cmd('start server box_1_b with wait=False, wait_load=False')
 test_run:cmd('start server box_1_c with wait=False, wait_load=False')
diff --git a/vshard/replicaset.lua b/vshard/replicaset.lua
index 56687be..e176945 100644
--- a/vshard/replicaset.lua
+++ b/vshard/replicaset.lua
@@ -25,8 +25,6 @@
 --      },
 --      master = <master server from the array above>,
 --      replica = <nearest available replica object>,
---      candidate = <replica with less weight, which tries to
---                   connect and replace an original replica>,
 --      replica_up_ts = <timestamp updated on each attempt to
 --                       connect to the nearest replica, and on
 --                       each connect event>,
@@ -68,8 +66,7 @@ local function netbox_on_connect(conn)
         conn:close()
         return
     end
-    if (replica == rs.replica or replica == rs.candidate) and
-       replica == rs.priority_list[1] then
+    if replica == rs.replica and replica == rs.priority_list[1] then
         -- Update replica_up_ts, if the current replica has the
         -- biggest priority. Really, it is not neccessary to
         -- increase replica connection priority, if the current
@@ -131,60 +128,6 @@ local function replicaset_connect_all(replicaset)
     end
 end
 
---
--- Make a replica be used for read requests or be candidate.
--- @param replicaset Replicaset for which a replica is set.
--- @param replica Replica to be used for read requests.
--- @param read_name Either replica or candidate. Both of them can
---        be updated independently (@sa update_candidate(),
---        down_priority()).
---
-local function replicaset_make_replica_read(replicaset, replica, read_name)
-    assert(read_name == 'replica' or read_name == 'candidate')
-    assert(replicaset[read_name] ~= replica)
-    replicaset_connect_to_replica(replicaset, replica)
-    replicaset[read_name] = replica
-end
-
---
--- Try to connect to another candidate. There is two cases:
--- either
--- * it is time to reconnect to the nearest replica - choose
---   first replica in priority list, or
--- * current candidate can not connect to a server during
---   DOWN_TIMEOUT seconds - then the candidate is set to a next by
---   priority.
---
--- New connection is stored into candidate. It replaces an
--- original replica when connected.
---
-local function replicaset_update_candidate(replicaset)
-    local old_candidate = replicaset.candidate
-    local new_candidate
-    local curr_ts = fiber.time()
-    local up_ts = replicaset.replica_up_ts
-    if not old_candidate or not up_ts or
-       curr_ts - up_ts >= consts.FAILOVER_UP_TIMEOUT then
-        new_candidate = replicaset.priority_list[1]
-        -- Update timestamp of the last attempt to connect to the
-        -- best replica.
-        replicaset.replica_up_ts = curr_ts
-        -- It is possible, that the current replica already has
-        -- the best priority. In such a case there is no need
-        -- to create candidate.
-        if new_candidate == replicaset.replica or
-           new_candidate == old_candidate then
-            return
-        end
-    else
-        assert(old_candidate.next_by_priority and old_candidate.down_ts and
-               curr_ts - old_candidate.down_ts >= consts.FAILOVER_DOWN_TIMEOUT
-               and old_candidate.next_by_priority ~= replicaset.replica)
-        new_candidate = old_candidate.next_by_priority
-    end
-    replicaset_make_replica_read(replicaset, new_candidate, 'candidate')
-end
-
 --
 -- Connect to a next replica with less priority against a current
 -- one. It is needed, if a current replica's connection is down
@@ -194,27 +137,39 @@ local function replicaset_down_replica_priority(replicaset)
     local old_replica = replicaset.replica
     assert(old_replica and old_replica.down_ts and
            not old_replica:is_connected())
-    local new_replica = replicaset.replica.next_by_priority
+    local new_replica = old_replica.next_by_priority
     if new_replica then
-        replicaset_make_replica_read(replicaset, new_replica, 'replica')
+        assert(new_replica ~= old_replica)
+        replicaset_connect_to_replica(replicaset, new_replica)
+        replicaset.replica = new_replica
     end
     -- Else the current replica already has the lowest priority.
     -- Can not down it.
 end
 
 --
--- Set candidate as the current replica. Candidate attribute is
--- nullified and can be reused, for example, to try to connect to
--- the nearest replica.
+-- Search a replica with higher priority than a current replica
+-- has.
 --
-local function replicaset_set_candidate_as_replica(replicaset)
-    assert(replicaset.candidate)
+local function replicaset_up_replica_priority(replicaset)
     local old_replica = replicaset.replica
-    replicaset.replica = replicaset.candidate
-    assert(not old_replica or
-           old_replica.weight >= replicaset.replica.weight and
-           old_replica ~= replicaset.replica)
-    replicaset.candidate = nil
+    if old_replica == replicaset.priority_list[1] and
+       old_replica:is_connected() then
+        replicaset.replica_up_ts = fiber.time()
+        return
+    end
+    for _, replica in pairs(replicaset.priority_list) do
+        if replica == old_replica then
+            -- Failed to up priority.
+            return
+        end
+        if replica:is_connected() then
+            replicaset.replica = replica
+            assert(not old_replica or
+                   old_replica.weight >= replicaset.replica.weight)
+            return
+        end
+    end
 end
 
 --
@@ -414,9 +369,8 @@ local replicaset_mt = {
         connect_all = replicaset_connect_all;
         connect_replica = replicaset_connect_to_replica;
         rebind_connections = replicaset_rebind_connections;
-        update_candidate = replicaset_update_candidate;
         down_replica_priority = replicaset_down_replica_priority;
-        set_candidate_as_replica = replicaset_set_candidate_as_replica;
+        up_replica_priority = replicaset_up_replica_priority;
         call = replicaset_master_call;
         callrw = replicaset_master_call;
         callro = replicaset_nearest_call;
@@ -580,9 +534,6 @@ local function destroy(replicasets)
         if rs.replica and rs.replica.conn then
             rs.replica.conn:close()
         end
-        if rs.candidate and rs.candidate.conn then
-            rs.candidate.conn:close()
-        end
     end
 end
 
diff --git a/vshard/router/init.lua b/vshard/router/init.lua
index d4d5aed..a57bd09 100644
--- a/vshard/router/init.lua
+++ b/vshard/router/init.lua
@@ -335,39 +335,12 @@ local function failover_need_down_priority(replicaset, curr_ts)
 end
 
 --
--- Replicaset must try to connect to a server with the highest
--- priority once per specified timeout. It allows to return to
--- the best server, if it was unavailable and has returned back.
--- And if the connection attempt was not successfull, then the
--- candidate must try a replica next by priority.
+-- Once per FAILOVER_UP_TIMEOUT a replicaset must try to connect
+-- to a replica with a higher priority.
 --
-local function failover_need_update_candidate(replicaset, curr_ts)
+local function failover_need_up_priority(replicaset, curr_ts)
     local up_ts = replicaset.replica_up_ts
-    -- First attempt to connect to replica.
-    if not up_ts then
-        return true
-    end
-    -- Try to reconnect to the best replica once per UP_TIMEOUT.
-    if curr_ts - up_ts >= consts.FAILOVER_UP_TIMEOUT then
-        return true
-    end
-    -- Candidate can not connect to a replica. Try next by
-    -- priority, if it is not current replica. Candidate always
-    -- must have weight <= current replica weight.
-    local candidate = replicaset.candidate
-    return candidate and
-           curr_ts - candidate.down_ts >= consts.FAILOVER_DOWN_TIMEOUT and
-           candidate.next_by_priority and
-           candidate.next_by_priority ~= replicaset.replica
-end
-
---
--- Check that a candidate is connected to its replica. In such a
--- case it becames new replica, because its weight <= current one.
---
-local function failover_is_candidate_connected(replicaset)
-    local candidate = replicaset.candidate
-    return candidate and candidate:is_connected()
+    return not up_ts or curr_ts - up_ts >= consts.FAILOVER_UP_TIMEOUT
 end
 
 --
@@ -379,8 +352,7 @@ local function failover_collect_to_update()
     local uuid_to_update = {}
     for uuid, rs in pairs(M.replicasets) do
         if failover_need_down_priority(rs, ts) or
-           failover_is_candidate_connected(rs) or
-           failover_need_update_candidate(rs, ts) then
+           failover_need_up_priority(rs, ts) then
             table.insert(uuid_to_update, uuid)
         end
     end
@@ -413,13 +385,10 @@ local function failover_step()
             return true
         end
         local old_replica = rs.replica
-        if failover_is_candidate_connected(rs) then
-            rs:set_candidate_as_replica()
+        if failover_need_up_priority(rs, curr_ts) then
+            rs:up_replica_priority()
             replica_is_changed = true
         end
-        if failover_need_update_candidate(rs, curr_ts) then
-            rs:update_candidate()
-        end
         if failover_need_down_priority(rs, curr_ts) then
             rs:down_replica_priority()
             replica_is_changed = true
@@ -519,7 +488,6 @@ local function router_cfg(cfg)
     -- connections and yield.
     for _, replicaset in pairs(new_replicasets) do
         replicaset:connect_all()
-        replicaset:update_candidate()
     end
     lreplicaset.wait_masters_connect(new_replicasets)
     if M.failover_fiber == nil then
-- 
2.14.3 (Apple Git-98)