Tarantool development patches archive
 help / color / mirror / Atom feed
From: Serge Petrenko <sergepetrenko@tarantool.org>
To: v.shpilevoy@tarantool.org
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH] raft: add a test with synchronous replication
Date: Fri,  2 Oct 2020 13:33:12 +0300	[thread overview]
Message-ID: <20201002103312.23042-1-sergepetrenko@tarantool.org> (raw)

---
Branch: https://github.com/tarantool/tarantool/tree/sp/raft-qsync-test

The test is relatively long (runs for 10 seconds on my machine).
but I still think it's worth having it, at least under --long option (I  haven't
added it to long_run list yet).

 test/replication/election_qsync.result   | 125 +++++++++++++++++++++++
 test/replication/election_qsync.test.lua |  70 +++++++++++++
 test/replication/election_replica.lua    |  10 +-
 3 files changed, 202 insertions(+), 3 deletions(-)
 create mode 100644 test/replication/election_qsync.result
 create mode 100644 test/replication/election_qsync.test.lua

diff --git a/test/replication/election_qsync.result b/test/replication/election_qsync.result
new file mode 100644
index 000000000..1bf13d7bc
--- /dev/null
+++ b/test/replication/election_qsync.result
@@ -0,0 +1,125 @@
+-- test-run result file version 2
+test_run = require('test_run').new()
+ | ---
+ | ...
+netbox = require('net.box')
+ | ---
+ | ...
+
+--
+-- gh-1146: Leader election + Qsync
+--
+test_run:cmd('setopt delimiter ";"')
+ | ---
+ | - true
+ | ...
+function get_leader(nrs)
+    local is_leader_cmd = 'return box.info.election.state == \'leader\''
+    local leader_nr = 0
+    test_run:wait_cond(function()
+        local leader_count = 0
+        for nr, do_check in pairs(nrs) do
+            if do_check then
+                local is_leader = test_run:eval('election_replica'..nr,
+                                                is_leader_cmd)[1]
+                if is_leader then
+                    leader_count = leader_count + 1
+                    leader_nr = nr
+                end
+                assert(leader_count <= 1)
+            end
+        end
+        return leader_count == 1
+    end)
+    return leader_nr
+end;
+ | ---
+ | ...
+
+test_run:cmd('setopt delimiter ""');
+ | ---
+ | - true
+ | ...
+
+SERVERS = {'election_replica1', 'election_replica2', 'election_replica3'}
+ | ---
+ | ...
+test_run:create_cluster(SERVERS, "replication", {args='2'})
+ | ---
+ | ...
+test_run:wait_fullmesh(SERVERS)
+ | ---
+ | ...
+
+nrs = {true, true, true}
+ | ---
+ | ...
+old_leader_nr = get_leader(nrs)
+ | ---
+ | ...
+old_leader = 'election_replica'..old_leader_nr
+ | ---
+ | ...
+leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
+ | ---
+ | ...
+c = netbox.connect(leader_port)
+ | ---
+ | ...
+
+_ = c:eval('box.schema.space.create("test", {is_sync=true})')
+ | ---
+ | ...
+_ = c:eval('box.space.test:create_index("pk")')
+ | ---
+ | ...
+
+-- Insert some data to a synchronous space, then kill the leader before the
+-- confirmation is written. Check successful confirmation on the new leader.
+test_run:cmd('setopt delimiter ";"')
+ | ---
+ | - true
+ | ...
+for i = 1,10 do
+    c:eval('box.cfg{replication_synchro_quorum=4, replication_synchro_timeout=1000}')
+    c.space.test:insert({i}, {is_async=true})
+    test_run:wait_cond(function() return c.space.test:get{i} ~= nil end)
+    test_run:cmd('stop server '..old_leader)
+    nrs[old_leader_nr] = false
+    new_leader_nr = get_leader(nrs)
+    new_leader = 'election_replica'..new_leader_nr
+    leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
+    c = netbox.connect(leader_port)
+    c:eval('box.ctl.clear_synchro_queue()')
+    c:eval('box.cfg{replication_synchro_timeout=1000}')
+    c.space._schema:replace{'smth'}
+    c.space.test:get{i}
+    test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2"')
+    nrs[old_leader_nr] = true
+    old_leader_nr = new_leader_nr
+    old_leader = new_leader
+end;
+ | ---
+ | ...
+test_run:cmd('setopt delimiter ""');
+ | ---
+ | - true
+ | ...
+-- We're connected to some leader.
+c.space.test:select{}
+ | ---
+ | - - [1]
+ |   - [2]
+ |   - [3]
+ |   - [4]
+ |   - [5]
+ |   - [6]
+ |   - [7]
+ |   - [8]
+ |   - [9]
+ |   - [10]
+ | ...
+
+test_run:drop_cluster(SERVERS)
+ | ---
+ | ...
diff --git a/test/replication/election_qsync.test.lua b/test/replication/election_qsync.test.lua
new file mode 100644
index 000000000..f069c71bb
--- /dev/null
+++ b/test/replication/election_qsync.test.lua
@@ -0,0 +1,70 @@
+test_run = require('test_run').new()
+netbox = require('net.box')
+
+--
+-- gh-1146: Leader election + Qsync
+--
+test_run:cmd('setopt delimiter ";"')
+function get_leader(nrs)
+    local is_leader_cmd = 'return box.info.election.state == \'leader\''
+    local leader_nr = 0
+    test_run:wait_cond(function()
+        local leader_count = 0
+        for nr, do_check in pairs(nrs) do
+            if do_check then
+                local is_leader = test_run:eval('election_replica'..nr,
+                                                is_leader_cmd)[1]
+                if is_leader then
+                    leader_count = leader_count + 1
+                    leader_nr = nr
+                end
+                assert(leader_count <= 1)
+            end
+        end
+        return leader_count == 1
+    end)
+    return leader_nr
+end;
+
+test_run:cmd('setopt delimiter ""');
+
+SERVERS = {'election_replica1', 'election_replica2', 'election_replica3'}
+test_run:create_cluster(SERVERS, "replication", {args='2'})
+test_run:wait_fullmesh(SERVERS)
+
+nrs = {true, true, true}
+old_leader_nr = get_leader(nrs)
+old_leader = 'election_replica'..old_leader_nr
+leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
+c = netbox.connect(leader_port)
+
+_ = c:eval('box.schema.space.create("test", {is_sync=true})')
+_ = c:eval('box.space.test:create_index("pk")')
+
+-- Insert some data to a synchronous space, then kill the leader before the
+-- confirmation is written. Check successful confirmation on the new leader.
+test_run:cmd('setopt delimiter ";"')
+for i = 1,10 do
+    c:eval('box.cfg{replication_synchro_quorum=4, replication_synchro_timeout=1000}')
+    c.space.test:insert({i}, {is_async=true})
+    test_run:wait_cond(function() return c.space.test:get{i} ~= nil end)
+    test_run:cmd('stop server '..old_leader)
+    nrs[old_leader_nr] = false
+    new_leader_nr = get_leader(nrs)
+    new_leader = 'election_replica'..new_leader_nr
+    leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
+    c = netbox.connect(leader_port)
+    c:eval('box.ctl.clear_synchro_queue()')
+    c:eval('box.cfg{replication_synchro_timeout=1000}')
+    c.space._schema:replace{'smth'}
+    c.space.test:get{i}
+    test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2"')
+    nrs[old_leader_nr] = true
+    old_leader_nr = new_leader_nr
+    old_leader = new_leader
+end;
+test_run:cmd('setopt delimiter ""');
+-- We're connected to some leader.
+c.space.test:select{}
+
+test_run:drop_cluster(SERVERS)
diff --git a/test/replication/election_replica.lua b/test/replication/election_replica.lua
index 36ea1f077..887d8a2a0 100644
--- a/test/replication/election_replica.lua
+++ b/test/replication/election_replica.lua
@@ -2,9 +2,10 @@
 
 local INSTANCE_ID = string.match(arg[0], "%d")
 local SOCKET_DIR = require('fio').cwd()
+local SYNCHRO_QUORUM = arg[1] and tonumber(arg[1]) or 3
 
 local function instance_uri(instance_id)
-    return SOCKET_DIR..'/autobootstrap'..instance_id..'.sock';
+    return SOCKET_DIR..'/election_replica'..instance_id..'.sock';
 end
 
 require('console').listen(os.getenv('ADMIN'))
@@ -19,8 +20,11 @@ box.cfg({
     replication_timeout = 0.1,
     election_is_enabled = true,
     election_is_candidate = true,
-    election_timeout = 0.1,
-    replication_synchro_quorum = 3,
+    -- Should be at least as big as replication_disconnect_timeout, which is
+    -- 4 * replication_timeout.
+    election_timeout = 0.4,
+    replication_synchro_quorum = SYNCHRO_QUORUM,
+    replication_synchro_timeout = 0.1,
     -- To reveal more election logs.
     log_level = 6,
 })
-- 
2.24.3 (Apple Git-128)

             reply	other threads:[~2020-10-02 10:33 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-02 10:33 Serge Petrenko [this message]
2020-10-04 13:54 ` Vladislav Shpilevoy
2020-10-05  8:52   ` Serge Petrenko
2020-10-05 21:40     ` Vladislav Shpilevoy
2020-10-06  7:30       ` Serge Petrenko
2020-10-06 10:04 ` Kirill Yukhin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201002103312.23042-1-sergepetrenko@tarantool.org \
    --to=sergepetrenko@tarantool.org \
    --cc=tarantool-patches@dev.tarantool.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH] raft: add a test with synchronous replication' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox