[Tarantool-patches] [PATCH] raft: add a test with synchronous replication

Serge Petrenko sergepetrenko at tarantool.org
Fri Oct 2 13:33:12 MSK 2020


---
Branch: https://github.com/tarantool/tarantool/tree/sp/raft-qsync-test

The test is relatively long (runs for 10 seconds on my machine).
but I still think it's worth having it, at least under --long option (I  haven't
added it to long_run list yet).

 test/replication/election_qsync.result   | 125 +++++++++++++++++++++++
 test/replication/election_qsync.test.lua |  70 +++++++++++++
 test/replication/election_replica.lua    |  10 +-
 3 files changed, 202 insertions(+), 3 deletions(-)
 create mode 100644 test/replication/election_qsync.result
 create mode 100644 test/replication/election_qsync.test.lua

diff --git a/test/replication/election_qsync.result b/test/replication/election_qsync.result
new file mode 100644
index 000000000..1bf13d7bc
--- /dev/null
+++ b/test/replication/election_qsync.result
@@ -0,0 +1,125 @@
+-- test-run result file version 2
+test_run = require('test_run').new()
+ | ---
+ | ...
+netbox = require('net.box')
+ | ---
+ | ...
+
+--
+-- gh-1146: Leader election + Qsync
+--
+test_run:cmd('setopt delimiter ";"')
+ | ---
+ | - true
+ | ...
+function get_leader(nrs)
+    local is_leader_cmd = 'return box.info.election.state == \'leader\''
+    local leader_nr = 0
+    test_run:wait_cond(function()
+        local leader_count = 0
+        for nr, do_check in pairs(nrs) do
+            if do_check then
+                local is_leader = test_run:eval('election_replica'..nr,
+                                                is_leader_cmd)[1]
+                if is_leader then
+                    leader_count = leader_count + 1
+                    leader_nr = nr
+                end
+                assert(leader_count <= 1)
+            end
+        end
+        return leader_count == 1
+    end)
+    return leader_nr
+end;
+ | ---
+ | ...
+
+test_run:cmd('setopt delimiter ""');
+ | ---
+ | - true
+ | ...
+
+SERVERS = {'election_replica1', 'election_replica2', 'election_replica3'}
+ | ---
+ | ...
+test_run:create_cluster(SERVERS, "replication", {args='2'})
+ | ---
+ | ...
+test_run:wait_fullmesh(SERVERS)
+ | ---
+ | ...
+
+nrs = {true, true, true}
+ | ---
+ | ...
+old_leader_nr = get_leader(nrs)
+ | ---
+ | ...
+old_leader = 'election_replica'..old_leader_nr
+ | ---
+ | ...
+leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
+ | ---
+ | ...
+c = netbox.connect(leader_port)
+ | ---
+ | ...
+
+_ = c:eval('box.schema.space.create("test", {is_sync=true})')
+ | ---
+ | ...
+_ = c:eval('box.space.test:create_index("pk")')
+ | ---
+ | ...
+
+-- Insert some data to a synchronous space, then kill the leader before the
+-- confirmation is written. Check successful confirmation on the new leader.
+test_run:cmd('setopt delimiter ";"')
+ | ---
+ | - true
+ | ...
+for i = 1,10 do
+    c:eval('box.cfg{replication_synchro_quorum=4, replication_synchro_timeout=1000}')
+    c.space.test:insert({i}, {is_async=true})
+    test_run:wait_cond(function() return c.space.test:get{i} ~= nil end)
+    test_run:cmd('stop server '..old_leader)
+    nrs[old_leader_nr] = false
+    new_leader_nr = get_leader(nrs)
+    new_leader = 'election_replica'..new_leader_nr
+    leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
+    c = netbox.connect(leader_port)
+    c:eval('box.ctl.clear_synchro_queue()')
+    c:eval('box.cfg{replication_synchro_timeout=1000}')
+    c.space._schema:replace{'smth'}
+    c.space.test:get{i}
+    test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2"')
+    nrs[old_leader_nr] = true
+    old_leader_nr = new_leader_nr
+    old_leader = new_leader
+end;
+ | ---
+ | ...
+test_run:cmd('setopt delimiter ""');
+ | ---
+ | - true
+ | ...
+-- We're connected to some leader.
+c.space.test:select{}
+ | ---
+ | - - [1]
+ |   - [2]
+ |   - [3]
+ |   - [4]
+ |   - [5]
+ |   - [6]
+ |   - [7]
+ |   - [8]
+ |   - [9]
+ |   - [10]
+ | ...
+
+test_run:drop_cluster(SERVERS)
+ | ---
+ | ...
diff --git a/test/replication/election_qsync.test.lua b/test/replication/election_qsync.test.lua
new file mode 100644
index 000000000..f069c71bb
--- /dev/null
+++ b/test/replication/election_qsync.test.lua
@@ -0,0 +1,70 @@
+test_run = require('test_run').new()
+netbox = require('net.box')
+
+--
+-- gh-1146: Leader election + Qsync
+--
+test_run:cmd('setopt delimiter ";"')
+function get_leader(nrs)
+    local is_leader_cmd = 'return box.info.election.state == \'leader\''
+    local leader_nr = 0
+    test_run:wait_cond(function()
+        local leader_count = 0
+        for nr, do_check in pairs(nrs) do
+            if do_check then
+                local is_leader = test_run:eval('election_replica'..nr,
+                                                is_leader_cmd)[1]
+                if is_leader then
+                    leader_count = leader_count + 1
+                    leader_nr = nr
+                end
+                assert(leader_count <= 1)
+            end
+        end
+        return leader_count == 1
+    end)
+    return leader_nr
+end;
+
+test_run:cmd('setopt delimiter ""');
+
+SERVERS = {'election_replica1', 'election_replica2', 'election_replica3'}
+test_run:create_cluster(SERVERS, "replication", {args='2'})
+test_run:wait_fullmesh(SERVERS)
+
+nrs = {true, true, true}
+old_leader_nr = get_leader(nrs)
+old_leader = 'election_replica'..old_leader_nr
+leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
+c = netbox.connect(leader_port)
+
+_ = c:eval('box.schema.space.create("test", {is_sync=true})')
+_ = c:eval('box.space.test:create_index("pk")')
+
+-- Insert some data to a synchronous space, then kill the leader before the
+-- confirmation is written. Check successful confirmation on the new leader.
+test_run:cmd('setopt delimiter ";"')
+for i = 1,10 do
+    c:eval('box.cfg{replication_synchro_quorum=4, replication_synchro_timeout=1000}')
+    c.space.test:insert({i}, {is_async=true})
+    test_run:wait_cond(function() return c.space.test:get{i} ~= nil end)
+    test_run:cmd('stop server '..old_leader)
+    nrs[old_leader_nr] = false
+    new_leader_nr = get_leader(nrs)
+    new_leader = 'election_replica'..new_leader_nr
+    leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
+    c = netbox.connect(leader_port)
+    c:eval('box.ctl.clear_synchro_queue()')
+    c:eval('box.cfg{replication_synchro_timeout=1000}')
+    c.space._schema:replace{'smth'}
+    c.space.test:get{i}
+    test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2"')
+    nrs[old_leader_nr] = true
+    old_leader_nr = new_leader_nr
+    old_leader = new_leader
+end;
+test_run:cmd('setopt delimiter ""');
+-- We're connected to some leader.
+c.space.test:select{}
+
+test_run:drop_cluster(SERVERS)
diff --git a/test/replication/election_replica.lua b/test/replication/election_replica.lua
index 36ea1f077..887d8a2a0 100644
--- a/test/replication/election_replica.lua
+++ b/test/replication/election_replica.lua
@@ -2,9 +2,10 @@
 
 local INSTANCE_ID = string.match(arg[0], "%d")
 local SOCKET_DIR = require('fio').cwd()
+local SYNCHRO_QUORUM = arg[1] and tonumber(arg[1]) or 3
 
 local function instance_uri(instance_id)
-    return SOCKET_DIR..'/autobootstrap'..instance_id..'.sock';
+    return SOCKET_DIR..'/election_replica'..instance_id..'.sock';
 end
 
 require('console').listen(os.getenv('ADMIN'))
@@ -19,8 +20,11 @@ box.cfg({
     replication_timeout = 0.1,
     election_is_enabled = true,
     election_is_candidate = true,
-    election_timeout = 0.1,
-    replication_synchro_quorum = 3,
+    -- Should be at least as big as replication_disconnect_timeout, which is
+    -- 4 * replication_timeout.
+    election_timeout = 0.4,
+    replication_synchro_quorum = SYNCHRO_QUORUM,
+    replication_synchro_timeout = 0.1,
     -- To reveal more election logs.
     log_level = 6,
 })
-- 
2.24.3 (Apple Git-128)



More information about the Tarantool-patches mailing list