[Tarantool-patches] [PATCH 4/4] election: activate raft split vote handling

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Sat Jan 15 03:48:56 MSK 2022


Raft needs to know cluster size in order to detect and handle
split vote. The patch uses registered server count as cluster
size.

It is not documented nor has a changelog file because this is an
optimization. Can't be observed except in logs or with a watch.

Closes #5285
---
 src/box/raft.c                                |  4 +-
 .../election_split_vote_test.lua              | 92 +++++++++++++++++++
 2 files changed, 95 insertions(+), 1 deletion(-)
 create mode 100644 test/replication-luatest/election_split_vote_test.lua

diff --git a/src/box/raft.c b/src/box/raft.c
index 1e360dc88..1908b71b6 100644
--- a/src/box/raft.c
+++ b/src/box/raft.c
@@ -229,7 +229,9 @@ box_raft_update_election_quorum(void)
 	 *   be lost.
 	 */
 	int quorum = MIN(replication_synchro_quorum, max);
-	raft_cfg_election_quorum(box_raft(), quorum);
+	struct raft *raft = box_raft();
+	raft_cfg_election_quorum(raft, quorum);
+	raft_cfg_cluster_size(raft, replicaset.registered_count);
 }
 
 void
diff --git a/test/replication-luatest/election_split_vote_test.lua b/test/replication-luatest/election_split_vote_test.lua
new file mode 100644
index 000000000..f31bfd7f3
--- /dev/null
+++ b/test/replication-luatest/election_split_vote_test.lua
@@ -0,0 +1,92 @@
+local t = require('luatest')
+local cluster = require('test.luatest_helpers.cluster')
+local helpers = require('test.luatest_helpers')
+local wait_timeout = 120
+
+--
+-- gh-5285: split vote is when in the current term there can't be winner of the
+-- leader role. Number of unused votes is not enough for anyone to get the
+-- quorum. It can be detected to speed up the term bump.
+--
+local g = t.group('split-vote')
+
+g.before_each(function()
+    g.cluster = cluster:new({})
+    local node1_uri = helpers.instance_uri('node1')
+    local node2_uri = helpers.instance_uri('node2')
+    local replication = {node1_uri, node2_uri}
+    local box_cfg = {
+        listen = node1_uri,
+        replication = replication,
+        -- To speed up new term when try to elect a first leader.
+        replication_timeout = 0.1,
+        replication_synchro_quorum = 2,
+        election_timeout = 1000000,
+    }
+    g.node1 = g.cluster:build_server({alias = 'node1', box_cfg = box_cfg})
+
+    box_cfg.listen = node2_uri
+    g.node2 = g.cluster:build_server({alias = 'node2', box_cfg = box_cfg})
+
+    g.cluster:add_server(g.node1)
+    g.cluster:add_server(g.node2)
+    g.cluster:start()
+end)
+
+g.after_each(function()
+    g.cluster:drop()
+end)
+
+g.test_split_vote = function(g)
+    -- Stop the replication so as the nodes can't request votes from each other.
+    local node1_repl = g.node1:exec(function()
+        local repl = box.cfg.replication
+        box.cfg{replication = {}}
+        return repl
+    end)
+    local node2_repl = g.node2:exec(function()
+        local repl = box.cfg.replication
+        box.cfg{replication = {}}
+        return repl
+    end)
+
+    -- Both vote for self but don't see the split-vote yet.
+    g.node1:exec(function()
+        box.cfg{election_mode = 'candidate'}
+    end)
+    g.node2:exec(function()
+        box.cfg{election_mode = 'candidate'}
+    end)
+
+    -- Wait for the votes to actually happen.
+    t.helpers.retrying({timeout = wait_timeout}, function()
+        local func = function()
+            return box.info.election.vote == box.info.id
+        end
+        assert(g.node1:exec(func))
+        assert(g.node2:exec(func))
+    end)
+
+    -- Now let the nodes notice the split vote.
+    g.node1:exec(function(repl)
+        box.cfg{replication = repl}
+    end, {node1_repl})
+    g.node2:exec(function(repl)
+        box.cfg{replication = repl}
+    end, {node2_repl})
+
+    t.helpers.retrying({timeout = wait_timeout}, function()
+        local msg = 'split vote is discovered'
+        assert(g.node1:grep_log(msg) or g.node2:grep_log(msg))
+    end)
+
+    -- Ensure a leader is eventually elected. Nothing is broken for good.
+    g.node1:exec(function()
+        box.cfg{election_timeout = 1}
+    end)
+    g.node2:exec(function()
+        box.cfg{election_timeout = 1}
+    end)
+    g.node1:wait_election_leader_found()
+    g.node2:wait_election_leader_found()
+end
-- 
2.24.3 (Apple Git-128)



More information about the Tarantool-patches mailing list