[Tarantool-patches] [PATCH v31 3/3] test: add gh-6036-qsync-order test

Serge Petrenko sergepetrenko at tarantool.org
Thu Mar 3 13:08:59 MSK 2022


02.03.2022 23:27, Cyrill Gorcunov пишет:
> To test that promotion requests are handled only when appropriate
> write to WAL completes, because we update memory data before the
> write finishes.
>
> Part-of #6036
>
> Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>


Thanks for working on this!

Please consider the following cosmetic changes:

1. There's build_and_add_server as an alias for build_server + add_server

2. Use luatest.assert instead of plain assert everywhere

3. Use exec instead of eval everywhere

===================================


diff --git a/test/replication-luatest/gh_6036_qsync_order_test.lua 
b/test/replication-luatest/gh_6036_qsync_order_test.lua
index c23c7a3a1..964101571 100644
--- a/test/replication-luatest/gh_6036_qsync_order_test.lua
+++ b/test/replication-luatest/gh_6036_qsync_order_test.lua
@@ -22,11 +22,9 @@ g.before_all(function(cg)
          log_level                   = 6,
      }

-    cg.r1 = cg.cluster:build_server({ alias = 'r1', box_cfg = cg.box_cfg })
-    cg.r2 = cg.cluster:build_server({ alias = 'r2', box_cfg = cg.box_cfg })
+    cg.r1 = cg.cluster:build_and_add_server({ alias = 'r1', box_cfg = 
cg.box_cfg })
+    cg.r2 = cg.cluster:build_and_add_server({ alias = 'r2', box_cfg = 
cg.box_cfg })

-    cg.cluster:add_server(cg.r1)
-    cg.cluster:add_server(cg.r2)
      cg.cluster:start()
  end)

@@ -43,8 +41,7 @@ end
  -- The test requires 3rd replica to graft in.
  g.before_test("test_qsync_order", function(cg)
      cg.box_cfg.replication[3] = server.build_instance_uri("r3")
-    cg.r3 = cg.cluster:build_server({ alias = 'r3', box_cfg = cg.box_cfg })
-    cg.cluster:add_server(cg.r3)
+    cg.r3 = cg.cluster:build_and_add_server({ alias = 'r3', box_cfg = 
cg.box_cfg })
      cg.r3:start()
      cg.r1:exec(update_replication, cg.box_cfg.replication)
      cg.r2:exec(update_replication, cg.box_cfg.replication)
@@ -69,9 +66,9 @@ g.test_qsync_order = function(cg)
      cg.r2:wait_vclock(vclock)
      cg.r3:wait_vclock(vclock)

-    t.assert_equals(cg.r1:eval("return box.space.test:select()"), {{1}})
-    t.assert_equals(cg.r2:eval("return box.space.test:select()"), {{1}})
-    t.assert_equals(cg.r3:eval("return box.space.test:select()"), {{1}})
+    t.assert_equals(cg.r1:exec(function() return 
box.space.test:select() end), {{1}})
+    t.assert_equals(cg.r2:exec(function() return 
box.space.test:select() end), {{1}})
+    t.assert_equals(cg.r3:exec(function() return 
box.space.test:select() end), {{1}})

      --
      -- Drop connection between r1 and r2.
@@ -113,7 +110,7 @@ g.test_qsync_order = function(cg)
          box.ctl.wait_rw()
      end)
      t.helpers.retrying({}, function()
-        assert(cg.r3:exec(function()
+        t.assert(cg.r3:exec(function()
              return box.info.synchro.queue.busy == true
          end))
      end)
@@ -136,12 +133,12 @@ g.test_qsync_order = function(cg)
          box.error.injection.set('ERRINJ_WAL_DELAY', false)
      end)
      t.helpers.retrying({}, function()
-        assert(cg.r3:exec(function()
+        t.assert(cg.r3:exec(function()
              return box.space.test:get{2} ~= nil
          end))
      end)

-    t.assert_equals(cg.r3:eval("return box.space.test:select()"), 
{{1},{2}})
+    t.assert_equals(cg.r3:exec(function() return 
box.space.test:select() end), {{1},{2}})
  end

  --
@@ -189,10 +186,10 @@ g.test_promote_order = function(cg)
          box.space.test:insert{4}
      end)
      cg.r2:exec(function()
-        assert(box.info.synchro.queue.busy == true)
+        require('luatest').assert(box.info.synchro.queue.busy == true)
          box.error.injection.set('ERRINJ_WAL_DELAY', false)
          box.ctl.wait_rw()
      end)

-    t.assert_equals(cg.r2:eval("return box.space.test:select()"), 
{{1},{2}})
+    t.assert_equals(cg.r2:exec(function() return 
box.space.test:select() end), {{1},{2}})
  end


===================================


The patchset LGTM once you apply these (or decline them).

Please, proceed to Vlad's review.


> ---
>   .../gh_6036_qsync_order_test.lua              | 198 ++++++++++++++++++
>   test/replication-luatest/suite.ini            |   1 +
>   2 files changed, 199 insertions(+)
>   create mode 100644 test/replication-luatest/gh_6036_qsync_order_test.lua
>
> diff --git a/test/replication-luatest/gh_6036_qsync_order_test.lua b/test/replication-luatest/gh_6036_qsync_order_test.lua
> new file mode 100644
> index 000000000..c23c7a3a1
> --- /dev/null
> +++ b/test/replication-luatest/gh_6036_qsync_order_test.lua
> @@ -0,0 +1,198 @@
> +local t = require('luatest')
> +local cluster = require('test.luatest_helpers.cluster')
> +local server = require('test.luatest_helpers.server')
> +local fiber = require('fiber')
> +
> +local g = t.group('gh-6036')
> +
> +g.before_all(function(cg)
> +    cg.cluster = cluster:new({})
> +
> +    cg.box_cfg = {
> +        replication = {
> +            server.build_instance_uri('r1'),
> +            server.build_instance_uri('r2'),
> +        },
> +        replication_timeout         = 0.1,
> +        replication_connect_quorum  = 1,
> +        election_mode               = 'manual',
> +        election_timeout            = 0.1,
> +        replication_synchro_quorum  = 1,
> +        replication_synchro_timeout = 0.1,
> +        log_level                   = 6,
> +    }
> +
> +    cg.r1 = cg.cluster:build_server({ alias = 'r1', box_cfg = cg.box_cfg })
> +    cg.r2 = cg.cluster:build_server({ alias = 'r2', box_cfg = cg.box_cfg })
> +
> +    cg.cluster:add_server(cg.r1)
> +    cg.cluster:add_server(cg.r2)
> +    cg.cluster:start()
> +end)
> +
> +g.after_all(function(cg)
> +    cg.cluster:drop()
> +    cg.cluster.servers = nil
> +end)
> +
> +local function update_replication(...)
> +    return (box.cfg{ replication = { ... } })
> +end
> +
> +--
> +-- The test requires 3rd replica to graft in.
> +g.before_test("test_qsync_order", function(cg)
> +    cg.box_cfg.replication[3] = server.build_instance_uri("r3")
> +    cg.r3 = cg.cluster:build_server({ alias = 'r3', box_cfg = cg.box_cfg })
> +    cg.cluster:add_server(cg.r3)
> +    cg.r3:start()
> +    cg.r1:exec(update_replication, cg.box_cfg.replication)
> +    cg.r2:exec(update_replication, cg.box_cfg.replication)
> +end)
> +
> +g.test_qsync_order = function(cg)
> +    cg.cluster:wait_fullmesh()
> +
> +    --
> +    -- Create a synchro space on the r1 node and make
> +    -- sure the write processed just fine.
> +    cg.r1:exec(function()
> +        box.ctl.promote()
> +        box.ctl.wait_rw()
> +        local s = box.schema.create_space('test', {is_sync = true})
> +        s:create_index('pk')
> +        s:insert{1}
> +    end)
> +
> +    local vclock = cg.r1:get_vclock()
> +    vclock[0] = nil
> +    cg.r2:wait_vclock(vclock)
> +    cg.r3:wait_vclock(vclock)
> +
> +    t.assert_equals(cg.r1:eval("return box.space.test:select()"), {{1}})
> +    t.assert_equals(cg.r2:eval("return box.space.test:select()"), {{1}})
> +    t.assert_equals(cg.r3:eval("return box.space.test:select()"), {{1}})
> +
> +    --
> +    -- Drop connection between r1 and r2.
> +    cg.r1:exec(update_replication, {
> +            server.build_instance_uri("r1"),
> +            server.build_instance_uri("r3"),
> +        })
> +
> +    --
> +    -- Drop connection between r2 and r1.
> +    cg.r2:exec(update_replication, {
> +        server.build_instance_uri("r2"),
> +        server.build_instance_uri("r3"),
> +    })
> +
> +    --
> +    -- Here we have the following scheme
> +    --
> +    --      r3 (WAL delay)
> +    --      /            \
> +    --    r1              r2
> +    --
> +
> +    --
> +    -- Initiate disk delay in a bit tricky way: the next write will
> +    -- fall into forever sleep.
> +    cg.r3:exec(function()
> +        box.error.injection.set('ERRINJ_WAL_DELAY', true)
> +    end)
> +
> +    --
> +    -- Make r2 been a leader and start writting data, the PROMOTE
> +    -- request get queued on r3 and not yet processed, same time
> +    -- the INSERT won't complete either waiting for the PROMOTE
> +    -- completion first. Note that we enter r3 as well just to be
> +    -- sure the PROMOTE has reached it via queue state test.
> +    cg.r2:exec(function()
> +        box.ctl.promote()
> +        box.ctl.wait_rw()
> +    end)
> +    t.helpers.retrying({}, function()
> +        assert(cg.r3:exec(function()
> +            return box.info.synchro.queue.busy == true
> +        end))
> +    end)
> +    cg.r2:exec(function()
> +        box.space.test:insert{2}
> +    end)
> +
> +    --
> +    -- The r1 node has no clue that there is a new leader and continue
> +    -- writing data with obsolete term. Since r3 is delayed now
> +    -- the INSERT won't proceed yet but get queued.
> +    cg.r1:exec(function()
> +        box.space.test:insert{3}
> +    end)
> +
> +    --
> +    -- Finally enable r3 back. Make sure the data from new r2 leader get
> +    -- writing while old leader's data ignored.
> +    cg.r3:exec(function()
> +        box.error.injection.set('ERRINJ_WAL_DELAY', false)
> +    end)
> +    t.helpers.retrying({}, function()
> +        assert(cg.r3:exec(function()
> +            return box.space.test:get{2} ~= nil
> +        end))
> +    end)
> +
> +    t.assert_equals(cg.r3:eval("return box.space.test:select()"), {{1},{2}})
> +end
> +
> +--
> +-- Drop the r3 replica, since it is no longer needed for this test.
> +g.after_test("test_qsync_order", function(cg)
> +    cg.box_cfg.replication[3] = nil
> +    cg.r1:exec(update_replication, cg.box_cfg.replication)
> +    cg.r2:exec(update_replication, cg.box_cfg.replication)
> +    cg.r3:stop()
> +    cg.r3:cleanup()
> +    cg.r3 = nil
> +end)
> +
> +g.test_promote_order = function(cg)
> +    --
> +    -- Make sure that while we're processing PROMOTE no other records
> +    -- get sneaked in via applier code from other replicas. For this
> +    -- sake initiate voting and stop inside wal thread just before
> +    -- PROMOTE get written. Another replica sends us new record and
> +    -- it should be dropped.
> +    cg.r1:exec(function()
> +        box.ctl.promote()
> +        box.ctl.wait_rw()
> +    end)
> +    local vclock = cg.r1:get_vclock()
> +    vclock[0] = nil
> +    cg.r2:wait_vclock(vclock)
> +
> +    --
> +    -- Drop connection between r1 and the rest of the cluster.
> +    -- Otherwise r1 might become Raft follower before attempting
> +    -- insert{4}.
> +    cg.r1:exec(function() box.cfg{replication=""} end)
> +    cg.r2:exec(function()
> +        box.error.injection.set('ERRINJ_WAL_DELAY_COUNTDOWN', 2)
> +        require('fiber').create(function() box.ctl.promote() end)
> +    end)
> +    t.helpers.retrying({}, function()
> +        t.assert(cg.r2:exec(function()
> +            return box.info.synchro.queue.busy
> +        end))
> +    end)
> +    t.assert(cg.r1:exec(function() return box.info.ro == false end))
> +    cg.r1:exec(function()
> +        box.space.test:insert{4}
> +    end)
> +    cg.r2:exec(function()
> +        assert(box.info.synchro.queue.busy == true)
> +        box.error.injection.set('ERRINJ_WAL_DELAY', false)
> +        box.ctl.wait_rw()
> +    end)
> +
> +    t.assert_equals(cg.r2:eval("return box.space.test:select()"), {{1},{2}})
> +end
> diff --git a/test/replication-luatest/suite.ini b/test/replication-luatest/suite.ini
> index 374f1b87a..07ec93a52 100644
> --- a/test/replication-luatest/suite.ini
> +++ b/test/replication-luatest/suite.ini
> @@ -2,3 +2,4 @@
>   core = luatest
>   description = replication luatests
>   is_parallel = True
> +release_disabled = gh_6036_qsync_order_test.lua


More information about the Tarantool-patches mailing list