[Tarantool-patches] [PATCH] raft: add a test with synchronous replication
Serge Petrenko
sergepetrenko at tarantool.org
Fri Oct 2 13:33:12 MSK 2020
---
Branch: https://github.com/tarantool/tarantool/tree/sp/raft-qsync-test
The test is relatively long (runs for 10 seconds on my machine).
but I still think it's worth having it, at least under --long option (I haven't
added it to long_run list yet).
test/replication/election_qsync.result | 125 +++++++++++++++++++++++
test/replication/election_qsync.test.lua | 70 +++++++++++++
test/replication/election_replica.lua | 10 +-
3 files changed, 202 insertions(+), 3 deletions(-)
create mode 100644 test/replication/election_qsync.result
create mode 100644 test/replication/election_qsync.test.lua
diff --git a/test/replication/election_qsync.result b/test/replication/election_qsync.result
new file mode 100644
index 000000000..1bf13d7bc
--- /dev/null
+++ b/test/replication/election_qsync.result
@@ -0,0 +1,125 @@
+-- test-run result file version 2
+test_run = require('test_run').new()
+ | ---
+ | ...
+netbox = require('net.box')
+ | ---
+ | ...
+
+--
+-- gh-1146: Leader election + Qsync
+--
+test_run:cmd('setopt delimiter ";"')
+ | ---
+ | - true
+ | ...
+function get_leader(nrs)
+ local is_leader_cmd = 'return box.info.election.state == \'leader\''
+ local leader_nr = 0
+ test_run:wait_cond(function()
+ local leader_count = 0
+ for nr, do_check in pairs(nrs) do
+ if do_check then
+ local is_leader = test_run:eval('election_replica'..nr,
+ is_leader_cmd)[1]
+ if is_leader then
+ leader_count = leader_count + 1
+ leader_nr = nr
+ end
+ assert(leader_count <= 1)
+ end
+ end
+ return leader_count == 1
+ end)
+ return leader_nr
+end;
+ | ---
+ | ...
+
+test_run:cmd('setopt delimiter ""');
+ | ---
+ | - true
+ | ...
+
+SERVERS = {'election_replica1', 'election_replica2', 'election_replica3'}
+ | ---
+ | ...
+test_run:create_cluster(SERVERS, "replication", {args='2'})
+ | ---
+ | ...
+test_run:wait_fullmesh(SERVERS)
+ | ---
+ | ...
+
+nrs = {true, true, true}
+ | ---
+ | ...
+old_leader_nr = get_leader(nrs)
+ | ---
+ | ...
+old_leader = 'election_replica'..old_leader_nr
+ | ---
+ | ...
+leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
+ | ---
+ | ...
+c = netbox.connect(leader_port)
+ | ---
+ | ...
+
+_ = c:eval('box.schema.space.create("test", {is_sync=true})')
+ | ---
+ | ...
+_ = c:eval('box.space.test:create_index("pk")')
+ | ---
+ | ...
+
+-- Insert some data to a synchronous space, then kill the leader before the
+-- confirmation is written. Check successful confirmation on the new leader.
+test_run:cmd('setopt delimiter ";"')
+ | ---
+ | - true
+ | ...
+for i = 1,10 do
+ c:eval('box.cfg{replication_synchro_quorum=4, replication_synchro_timeout=1000}')
+ c.space.test:insert({i}, {is_async=true})
+ test_run:wait_cond(function() return c.space.test:get{i} ~= nil end)
+ test_run:cmd('stop server '..old_leader)
+ nrs[old_leader_nr] = false
+ new_leader_nr = get_leader(nrs)
+ new_leader = 'election_replica'..new_leader_nr
+ leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
+ c = netbox.connect(leader_port)
+ c:eval('box.ctl.clear_synchro_queue()')
+ c:eval('box.cfg{replication_synchro_timeout=1000}')
+ c.space._schema:replace{'smth'}
+ c.space.test:get{i}
+ test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2"')
+ nrs[old_leader_nr] = true
+ old_leader_nr = new_leader_nr
+ old_leader = new_leader
+end;
+ | ---
+ | ...
+test_run:cmd('setopt delimiter ""');
+ | ---
+ | - true
+ | ...
+-- We're connected to some leader.
+c.space.test:select{}
+ | ---
+ | - - [1]
+ | - [2]
+ | - [3]
+ | - [4]
+ | - [5]
+ | - [6]
+ | - [7]
+ | - [8]
+ | - [9]
+ | - [10]
+ | ...
+
+test_run:drop_cluster(SERVERS)
+ | ---
+ | ...
diff --git a/test/replication/election_qsync.test.lua b/test/replication/election_qsync.test.lua
new file mode 100644
index 000000000..f069c71bb
--- /dev/null
+++ b/test/replication/election_qsync.test.lua
@@ -0,0 +1,70 @@
+test_run = require('test_run').new()
+netbox = require('net.box')
+
+--
+-- gh-1146: Leader election + Qsync
+--
+test_run:cmd('setopt delimiter ";"')
+function get_leader(nrs)
+ local is_leader_cmd = 'return box.info.election.state == \'leader\''
+ local leader_nr = 0
+ test_run:wait_cond(function()
+ local leader_count = 0
+ for nr, do_check in pairs(nrs) do
+ if do_check then
+ local is_leader = test_run:eval('election_replica'..nr,
+ is_leader_cmd)[1]
+ if is_leader then
+ leader_count = leader_count + 1
+ leader_nr = nr
+ end
+ assert(leader_count <= 1)
+ end
+ end
+ return leader_count == 1
+ end)
+ return leader_nr
+end;
+
+test_run:cmd('setopt delimiter ""');
+
+SERVERS = {'election_replica1', 'election_replica2', 'election_replica3'}
+test_run:create_cluster(SERVERS, "replication", {args='2'})
+test_run:wait_fullmesh(SERVERS)
+
+nrs = {true, true, true}
+old_leader_nr = get_leader(nrs)
+old_leader = 'election_replica'..old_leader_nr
+leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
+c = netbox.connect(leader_port)
+
+_ = c:eval('box.schema.space.create("test", {is_sync=true})')
+_ = c:eval('box.space.test:create_index("pk")')
+
+-- Insert some data to a synchronous space, then kill the leader before the
+-- confirmation is written. Check successful confirmation on the new leader.
+test_run:cmd('setopt delimiter ";"')
+for i = 1,10 do
+ c:eval('box.cfg{replication_synchro_quorum=4, replication_synchro_timeout=1000}')
+ c.space.test:insert({i}, {is_async=true})
+ test_run:wait_cond(function() return c.space.test:get{i} ~= nil end)
+ test_run:cmd('stop server '..old_leader)
+ nrs[old_leader_nr] = false
+ new_leader_nr = get_leader(nrs)
+ new_leader = 'election_replica'..new_leader_nr
+ leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
+ c = netbox.connect(leader_port)
+ c:eval('box.ctl.clear_synchro_queue()')
+ c:eval('box.cfg{replication_synchro_timeout=1000}')
+ c.space._schema:replace{'smth'}
+ c.space.test:get{i}
+ test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2"')
+ nrs[old_leader_nr] = true
+ old_leader_nr = new_leader_nr
+ old_leader = new_leader
+end;
+test_run:cmd('setopt delimiter ""');
+-- We're connected to some leader.
+c.space.test:select{}
+
+test_run:drop_cluster(SERVERS)
diff --git a/test/replication/election_replica.lua b/test/replication/election_replica.lua
index 36ea1f077..887d8a2a0 100644
--- a/test/replication/election_replica.lua
+++ b/test/replication/election_replica.lua
@@ -2,9 +2,10 @@
local INSTANCE_ID = string.match(arg[0], "%d")
local SOCKET_DIR = require('fio').cwd()
+local SYNCHRO_QUORUM = arg[1] and tonumber(arg[1]) or 3
local function instance_uri(instance_id)
- return SOCKET_DIR..'/autobootstrap'..instance_id..'.sock';
+ return SOCKET_DIR..'/election_replica'..instance_id..'.sock';
end
require('console').listen(os.getenv('ADMIN'))
@@ -19,8 +20,11 @@ box.cfg({
replication_timeout = 0.1,
election_is_enabled = true,
election_is_candidate = true,
- election_timeout = 0.1,
- replication_synchro_quorum = 3,
+ -- Should be at least as big as replication_disconnect_timeout, which is
+ -- 4 * replication_timeout.
+ election_timeout = 0.4,
+ replication_synchro_quorum = SYNCHRO_QUORUM,
+ replication_synchro_timeout = 0.1,
-- To reveal more election logs.
log_level = 6,
})
--
2.24.3 (Apple Git-128)
More information about the Tarantool-patches
mailing list