[Tarantool-patches] [PATCH v31 3/3] test: add gh-6036-qsync-order test
Serge Petrenko
sergepetrenko at tarantool.org
Thu Mar 3 13:08:59 MSK 2022
02.03.2022 23:27, Cyrill Gorcunov пишет:
> To test that promotion requests are handled only when appropriate
> write to WAL completes, because we update memory data before the
> write finishes.
>
> Part-of #6036
>
> Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
Thanks for working on this!
Please consider the following cosmetic changes:
1. There's build_and_add_server as an alias for build_server + add_server
2. Use luatest.assert instead of plain assert everywhere
3. Use exec instead of eval everywhere
===================================
diff --git a/test/replication-luatest/gh_6036_qsync_order_test.lua
b/test/replication-luatest/gh_6036_qsync_order_test.lua
index c23c7a3a1..964101571 100644
--- a/test/replication-luatest/gh_6036_qsync_order_test.lua
+++ b/test/replication-luatest/gh_6036_qsync_order_test.lua
@@ -22,11 +22,9 @@ g.before_all(function(cg)
log_level = 6,
}
- cg.r1 = cg.cluster:build_server({ alias = 'r1', box_cfg = cg.box_cfg })
- cg.r2 = cg.cluster:build_server({ alias = 'r2', box_cfg = cg.box_cfg })
+ cg.r1 = cg.cluster:build_and_add_server({ alias = 'r1', box_cfg =
cg.box_cfg })
+ cg.r2 = cg.cluster:build_and_add_server({ alias = 'r2', box_cfg =
cg.box_cfg })
- cg.cluster:add_server(cg.r1)
- cg.cluster:add_server(cg.r2)
cg.cluster:start()
end)
@@ -43,8 +41,7 @@ end
-- The test requires 3rd replica to graft in.
g.before_test("test_qsync_order", function(cg)
cg.box_cfg.replication[3] = server.build_instance_uri("r3")
- cg.r3 = cg.cluster:build_server({ alias = 'r3', box_cfg = cg.box_cfg })
- cg.cluster:add_server(cg.r3)
+ cg.r3 = cg.cluster:build_and_add_server({ alias = 'r3', box_cfg =
cg.box_cfg })
cg.r3:start()
cg.r1:exec(update_replication, cg.box_cfg.replication)
cg.r2:exec(update_replication, cg.box_cfg.replication)
@@ -69,9 +66,9 @@ g.test_qsync_order = function(cg)
cg.r2:wait_vclock(vclock)
cg.r3:wait_vclock(vclock)
- t.assert_equals(cg.r1:eval("return box.space.test:select()"), {{1}})
- t.assert_equals(cg.r2:eval("return box.space.test:select()"), {{1}})
- t.assert_equals(cg.r3:eval("return box.space.test:select()"), {{1}})
+ t.assert_equals(cg.r1:exec(function() return
box.space.test:select() end), {{1}})
+ t.assert_equals(cg.r2:exec(function() return
box.space.test:select() end), {{1}})
+ t.assert_equals(cg.r3:exec(function() return
box.space.test:select() end), {{1}})
--
-- Drop connection between r1 and r2.
@@ -113,7 +110,7 @@ g.test_qsync_order = function(cg)
box.ctl.wait_rw()
end)
t.helpers.retrying({}, function()
- assert(cg.r3:exec(function()
+ t.assert(cg.r3:exec(function()
return box.info.synchro.queue.busy == true
end))
end)
@@ -136,12 +133,12 @@ g.test_qsync_order = function(cg)
box.error.injection.set('ERRINJ_WAL_DELAY', false)
end)
t.helpers.retrying({}, function()
- assert(cg.r3:exec(function()
+ t.assert(cg.r3:exec(function()
return box.space.test:get{2} ~= nil
end))
end)
- t.assert_equals(cg.r3:eval("return box.space.test:select()"),
{{1},{2}})
+ t.assert_equals(cg.r3:exec(function() return
box.space.test:select() end), {{1},{2}})
end
--
@@ -189,10 +186,10 @@ g.test_promote_order = function(cg)
box.space.test:insert{4}
end)
cg.r2:exec(function()
- assert(box.info.synchro.queue.busy == true)
+ require('luatest').assert(box.info.synchro.queue.busy == true)
box.error.injection.set('ERRINJ_WAL_DELAY', false)
box.ctl.wait_rw()
end)
- t.assert_equals(cg.r2:eval("return box.space.test:select()"),
{{1},{2}})
+ t.assert_equals(cg.r2:exec(function() return
box.space.test:select() end), {{1},{2}})
end
===================================
The patchset LGTM once you apply these (or decline them).
Please, proceed to Vlad's review.
> ---
> .../gh_6036_qsync_order_test.lua | 198 ++++++++++++++++++
> test/replication-luatest/suite.ini | 1 +
> 2 files changed, 199 insertions(+)
> create mode 100644 test/replication-luatest/gh_6036_qsync_order_test.lua
>
> diff --git a/test/replication-luatest/gh_6036_qsync_order_test.lua b/test/replication-luatest/gh_6036_qsync_order_test.lua
> new file mode 100644
> index 000000000..c23c7a3a1
> --- /dev/null
> +++ b/test/replication-luatest/gh_6036_qsync_order_test.lua
> @@ -0,0 +1,198 @@
> +local t = require('luatest')
> +local cluster = require('test.luatest_helpers.cluster')
> +local server = require('test.luatest_helpers.server')
> +local fiber = require('fiber')
> +
> +local g = t.group('gh-6036')
> +
> +g.before_all(function(cg)
> + cg.cluster = cluster:new({})
> +
> + cg.box_cfg = {
> + replication = {
> + server.build_instance_uri('r1'),
> + server.build_instance_uri('r2'),
> + },
> + replication_timeout = 0.1,
> + replication_connect_quorum = 1,
> + election_mode = 'manual',
> + election_timeout = 0.1,
> + replication_synchro_quorum = 1,
> + replication_synchro_timeout = 0.1,
> + log_level = 6,
> + }
> +
> + cg.r1 = cg.cluster:build_server({ alias = 'r1', box_cfg = cg.box_cfg })
> + cg.r2 = cg.cluster:build_server({ alias = 'r2', box_cfg = cg.box_cfg })
> +
> + cg.cluster:add_server(cg.r1)
> + cg.cluster:add_server(cg.r2)
> + cg.cluster:start()
> +end)
> +
> +g.after_all(function(cg)
> + cg.cluster:drop()
> + cg.cluster.servers = nil
> +end)
> +
> +local function update_replication(...)
> + return (box.cfg{ replication = { ... } })
> +end
> +
> +--
> +-- The test requires 3rd replica to graft in.
> +g.before_test("test_qsync_order", function(cg)
> + cg.box_cfg.replication[3] = server.build_instance_uri("r3")
> + cg.r3 = cg.cluster:build_server({ alias = 'r3', box_cfg = cg.box_cfg })
> + cg.cluster:add_server(cg.r3)
> + cg.r3:start()
> + cg.r1:exec(update_replication, cg.box_cfg.replication)
> + cg.r2:exec(update_replication, cg.box_cfg.replication)
> +end)
> +
> +g.test_qsync_order = function(cg)
> + cg.cluster:wait_fullmesh()
> +
> + --
> + -- Create a synchro space on the r1 node and make
> + -- sure the write processed just fine.
> + cg.r1:exec(function()
> + box.ctl.promote()
> + box.ctl.wait_rw()
> + local s = box.schema.create_space('test', {is_sync = true})
> + s:create_index('pk')
> + s:insert{1}
> + end)
> +
> + local vclock = cg.r1:get_vclock()
> + vclock[0] = nil
> + cg.r2:wait_vclock(vclock)
> + cg.r3:wait_vclock(vclock)
> +
> + t.assert_equals(cg.r1:eval("return box.space.test:select()"), {{1}})
> + t.assert_equals(cg.r2:eval("return box.space.test:select()"), {{1}})
> + t.assert_equals(cg.r3:eval("return box.space.test:select()"), {{1}})
> +
> + --
> + -- Drop connection between r1 and r2.
> + cg.r1:exec(update_replication, {
> + server.build_instance_uri("r1"),
> + server.build_instance_uri("r3"),
> + })
> +
> + --
> + -- Drop connection between r2 and r1.
> + cg.r2:exec(update_replication, {
> + server.build_instance_uri("r2"),
> + server.build_instance_uri("r3"),
> + })
> +
> + --
> + -- Here we have the following scheme
> + --
> + -- r3 (WAL delay)
> + -- / \
> + -- r1 r2
> + --
> +
> + --
> + -- Initiate disk delay in a bit tricky way: the next write will
> + -- fall into forever sleep.
> + cg.r3:exec(function()
> + box.error.injection.set('ERRINJ_WAL_DELAY', true)
> + end)
> +
> + --
> + -- Make r2 been a leader and start writting data, the PROMOTE
> + -- request get queued on r3 and not yet processed, same time
> + -- the INSERT won't complete either waiting for the PROMOTE
> + -- completion first. Note that we enter r3 as well just to be
> + -- sure the PROMOTE has reached it via queue state test.
> + cg.r2:exec(function()
> + box.ctl.promote()
> + box.ctl.wait_rw()
> + end)
> + t.helpers.retrying({}, function()
> + assert(cg.r3:exec(function()
> + return box.info.synchro.queue.busy == true
> + end))
> + end)
> + cg.r2:exec(function()
> + box.space.test:insert{2}
> + end)
> +
> + --
> + -- The r1 node has no clue that there is a new leader and continue
> + -- writing data with obsolete term. Since r3 is delayed now
> + -- the INSERT won't proceed yet but get queued.
> + cg.r1:exec(function()
> + box.space.test:insert{3}
> + end)
> +
> + --
> + -- Finally enable r3 back. Make sure the data from new r2 leader get
> + -- writing while old leader's data ignored.
> + cg.r3:exec(function()
> + box.error.injection.set('ERRINJ_WAL_DELAY', false)
> + end)
> + t.helpers.retrying({}, function()
> + assert(cg.r3:exec(function()
> + return box.space.test:get{2} ~= nil
> + end))
> + end)
> +
> + t.assert_equals(cg.r3:eval("return box.space.test:select()"), {{1},{2}})
> +end
> +
> +--
> +-- Drop the r3 replica, since it is no longer needed for this test.
> +g.after_test("test_qsync_order", function(cg)
> + cg.box_cfg.replication[3] = nil
> + cg.r1:exec(update_replication, cg.box_cfg.replication)
> + cg.r2:exec(update_replication, cg.box_cfg.replication)
> + cg.r3:stop()
> + cg.r3:cleanup()
> + cg.r3 = nil
> +end)
> +
> +g.test_promote_order = function(cg)
> + --
> + -- Make sure that while we're processing PROMOTE no other records
> + -- get sneaked in via applier code from other replicas. For this
> + -- sake initiate voting and stop inside wal thread just before
> + -- PROMOTE get written. Another replica sends us new record and
> + -- it should be dropped.
> + cg.r1:exec(function()
> + box.ctl.promote()
> + box.ctl.wait_rw()
> + end)
> + local vclock = cg.r1:get_vclock()
> + vclock[0] = nil
> + cg.r2:wait_vclock(vclock)
> +
> + --
> + -- Drop connection between r1 and the rest of the cluster.
> + -- Otherwise r1 might become Raft follower before attempting
> + -- insert{4}.
> + cg.r1:exec(function() box.cfg{replication=""} end)
> + cg.r2:exec(function()
> + box.error.injection.set('ERRINJ_WAL_DELAY_COUNTDOWN', 2)
> + require('fiber').create(function() box.ctl.promote() end)
> + end)
> + t.helpers.retrying({}, function()
> + t.assert(cg.r2:exec(function()
> + return box.info.synchro.queue.busy
> + end))
> + end)
> + t.assert(cg.r1:exec(function() return box.info.ro == false end))
> + cg.r1:exec(function()
> + box.space.test:insert{4}
> + end)
> + cg.r2:exec(function()
> + assert(box.info.synchro.queue.busy == true)
> + box.error.injection.set('ERRINJ_WAL_DELAY', false)
> + box.ctl.wait_rw()
> + end)
> +
> + t.assert_equals(cg.r2:eval("return box.space.test:select()"), {{1},{2}})
> +end
> diff --git a/test/replication-luatest/suite.ini b/test/replication-luatest/suite.ini
> index 374f1b87a..07ec93a52 100644
> --- a/test/replication-luatest/suite.ini
> +++ b/test/replication-luatest/suite.ini
> @@ -2,3 +2,4 @@
> core = luatest
> description = replication luatests
> is_parallel = True
> +release_disabled = gh_6036_qsync_order_test.lua
More information about the Tarantool-patches
mailing list