[Tarantool-patches] [PATCH 4/3] test: add gh-6036-term-order test

Cyrill Gorcunov gorcunov at gmail.com
Mon Sep 20 18:22:48 MSK 2021


In-scope-of #6036

Signed-off-by: Cyrill Gorcunov <gorcunov at gmail.com>
---
 test/replication/gh-6036-order-master.lua    |   1 +
 test/replication/gh-6036-order-node.lua      |  60 +++++
 test/replication/gh-6036-order-replica1.lua  |   1 +
 test/replication/gh-6036-order-replica2.lua  |   1 +
 test/replication/gh-6036-term-order.result   | 222 +++++++++++++++++++
 test/replication/gh-6036-term-order.test.lua |  89 ++++++++
 test/replication/suite.cfg                   |   1 +
 7 files changed, 375 insertions(+)
 create mode 120000 test/replication/gh-6036-order-master.lua
 create mode 100644 test/replication/gh-6036-order-node.lua
 create mode 120000 test/replication/gh-6036-order-replica1.lua
 create mode 120000 test/replication/gh-6036-order-replica2.lua
 create mode 100644 test/replication/gh-6036-term-order.result
 create mode 100644 test/replication/gh-6036-term-order.test.lua

diff --git a/test/replication/gh-6036-order-master.lua b/test/replication/gh-6036-order-master.lua
new file mode 120000
index 000000000..82a6073a1
--- /dev/null
+++ b/test/replication/gh-6036-order-master.lua
@@ -0,0 +1 @@
+gh-6036-order-node.lua
\ No newline at end of file
diff --git a/test/replication/gh-6036-order-node.lua b/test/replication/gh-6036-order-node.lua
new file mode 100644
index 000000000..b22a7cb4c
--- /dev/null
+++ b/test/replication/gh-6036-order-node.lua
@@ -0,0 +1,60 @@
+local INSTANCE_ID = string.match(arg[0], "gh%-6036%-order%-(.+)%.lua")
+
+local function unix_socket(name)
+    return "unix/:./" .. name .. '.sock';
+end
+
+require('console').listen(os.getenv('ADMIN'))
+
+if INSTANCE_ID == "master" then
+    box.cfg({
+        listen                      = unix_socket(INSTANCE_ID),
+        replication                 = {
+            unix_socket(INSTANCE_ID),
+            unix_socket("replica1"),
+            unix_socket("replica2"),
+        },
+        replication_connect_quorum  = 1,
+        replication_synchro_quorum  = 1,
+        replication_synchro_timeout = 10000,
+        replication_sync_timeout    = 5,
+        read_only                   = false,
+        election_mode               = "off",
+    })
+elseif INSTANCE_ID == "replica1" then
+    box.cfg({
+        listen                      = unix_socket(INSTANCE_ID),
+        replication                 = {
+            unix_socket("master"),
+            unix_socket(INSTANCE_ID),
+            unix_socket("replica2"),
+        },
+        replication_connect_quorum  = 1,
+        replication_synchro_quorum  = 1,
+        replication_synchro_timeout = 10000,
+        replication_sync_timeout    = 5,
+        read_only                   = false,
+        election_mode               = "off",
+    })
+else
+    assert(INSTANCE_ID == "replica2")
+    box.cfg({
+        listen                      = unix_socket(INSTANCE_ID),
+        replication                 = {
+            unix_socket("master"),
+            unix_socket("replica1"),
+            unix_socket(INSTANCE_ID),
+        },
+        replication_connect_quorum  = 1,
+        replication_synchro_quorum  = 1,
+        replication_synchro_timeout = 10000,
+        replication_sync_timeout    = 5,
+        read_only                   = true,
+        election_mode               = "off",
+    })
+end
+
+--box.ctl.wait_rw()
+box.once("bootstrap", function()
+    box.schema.user.grant('guest', 'super')
+end)
diff --git a/test/replication/gh-6036-order-replica1.lua b/test/replication/gh-6036-order-replica1.lua
new file mode 120000
index 000000000..82a6073a1
--- /dev/null
+++ b/test/replication/gh-6036-order-replica1.lua
@@ -0,0 +1 @@
+gh-6036-order-node.lua
\ No newline at end of file
diff --git a/test/replication/gh-6036-order-replica2.lua b/test/replication/gh-6036-order-replica2.lua
new file mode 120000
index 000000000..82a6073a1
--- /dev/null
+++ b/test/replication/gh-6036-order-replica2.lua
@@ -0,0 +1 @@
+gh-6036-order-node.lua
\ No newline at end of file
diff --git a/test/replication/gh-6036-term-order.result b/test/replication/gh-6036-term-order.result
new file mode 100644
index 000000000..3a5f8bfb2
--- /dev/null
+++ b/test/replication/gh-6036-term-order.result
@@ -0,0 +1,222 @@
+-- test-run result file version 2
+--
+-- gh-6036: verify that terms are locked when we're inside journal
+-- write routine, because parallel appliers may ignore the fact that
+-- the term is updated already but not yet written leading to data
+-- inconsistency.
+--
+test_run = require('test_run').new()
+ | ---
+ | ...
+
+test_run:cmd('create server master with script="replication/gh-6036-order-master.lua"')
+ | ---
+ | - true
+ | ...
+test_run:cmd('create server replica1 with script="replication/gh-6036-order-replica1.lua"')
+ | ---
+ | - true
+ | ...
+test_run:cmd('create server replica2 with script="replication/gh-6036-order-replica2.lua"')
+ | ---
+ | - true
+ | ...
+
+test_run:cmd('start server master with wait=False')
+ | ---
+ | - true
+ | ...
+test_run:cmd('start server replica1 with wait=False')
+ | ---
+ | - true
+ | ...
+test_run:cmd('start server replica2 with wait=False')
+ | ---
+ | - true
+ | ...
+
+test_run:wait_fullmesh({"master", "replica1", "replica2"})
+ | ---
+ | ...
+
+test_run:switch("master")
+ | ---
+ | - true
+ | ...
+box.ctl.demote()
+ | ---
+ | ...
+assert(box.info.election.term == 1)
+ | ---
+ | - true
+ | ...
+
+test_run:switch("replica1")
+ | ---
+ | - true
+ | ...
+box.ctl.demote()
+ | ---
+ | ...
+assert(box.info.election.term == 1)
+ | ---
+ | - true
+ | ...
+
+test_run:switch("replica2")
+ | ---
+ | - true
+ | ...
+box.ctl.demote()
+ | ---
+ | ...
+assert(box.info.election.term == 1)
+ | ---
+ | - true
+ | ...
+
+--
+-- Drop connection between master and replica1
+test_run:switch("master")
+ | ---
+ | - true
+ | ...
+box.cfg({                                   \
+    replication = {                         \
+        "unix/:./master.sock",              \
+        "unix/:./replica2.sock",            \
+    },                                      \
+})
+ | ---
+ | ...
+test_run:switch("replica1")
+ | ---
+ | - true
+ | ...
+box.cfg({                                   \
+    replication = {                         \
+        "unix/:./replica1.sock",            \
+        "unix/:./replica2.sock",            \
+    },                                      \
+})
+ | ---
+ | ...
+
+--
+-- Initiate disk delay on replica2
+test_run:switch("replica2")
+ | ---
+ | - true
+ | ...
+assert(box.info.election.term == 1)
+ | ---
+ | - true
+ | ...
+box.error.injection.set('ERRINJ_WAL_DELAY', true)
+ | ---
+ | - ok
+ | ...
+
+--
+-- Ping-pong the promote action between master and
+-- replica1 nodes, the term updates get queued on
+-- replica2 because of disk being disabled.
+test_run:switch("master")
+ | ---
+ | - true
+ | ...
+box.ctl.promote()
+ | ---
+ | ...
+assert(box.info.election.term == 2)
+ | ---
+ | - true
+ | ...
+box.ctl.demote()
+ | ---
+ | ...
+assert(box.info.election.term == 3)
+ | ---
+ | - true
+ | ...
+
+test_run:switch("replica1")
+ | ---
+ | - true
+ | ...
+box.ctl.promote()
+ | ---
+ | ...
+assert(box.info.election.term == 2)
+ | ---
+ | - true
+ | ...
+box.ctl.demote()
+ | ---
+ | ...
+assert(box.info.election.term == 3)
+ | ---
+ | - true
+ | ...
+
+test_run:switch("master")
+ | ---
+ | - true
+ | ...
+box.ctl.promote()
+ | ---
+ | ...
+assert(box.info.election.term == 4)
+ | ---
+ | - true
+ | ...
+
+--
+-- Finally turn back disk on replica2 so the
+-- terms get sequenced.
+test_run:switch("replica2")
+ | ---
+ | - true
+ | ...
+assert(box.info.election.term == 2)
+ | ---
+ | - true
+ | ...
+box.error.injection.set('ERRINJ_WAL_DELAY', false)
+ | ---
+ | - ok
+ | ...
+test_run:wait_cond(function() return box.info.election.term == 4 end, 100)
+ | ---
+ | - true
+ | ...
+
+test_run:switch("default")
+ | ---
+ | - true
+ | ...
+test_run:cmd('stop server master')
+ | ---
+ | - true
+ | ...
+test_run:cmd('stop server replica1')
+ | ---
+ | - true
+ | ...
+test_run:cmd('stop server replica2')
+ | ---
+ | - true
+ | ...
+
+test_run:cmd('delete server master')
+ | ---
+ | - true
+ | ...
+test_run:cmd('delete server replica1')
+ | ---
+ | - true
+ | ...
+test_run:cmd('delete server replica2')
+ | ---
+ | - true
+ | ...
diff --git a/test/replication/gh-6036-term-order.test.lua b/test/replication/gh-6036-term-order.test.lua
new file mode 100644
index 000000000..79dd8efe4
--- /dev/null
+++ b/test/replication/gh-6036-term-order.test.lua
@@ -0,0 +1,89 @@
+--
+-- gh-6036: verify that terms are locked when we're inside journal
+-- write routine, because parallel appliers may ignore the fact that
+-- the term is updated already but not yet written leading to data
+-- inconsistency.
+--
+test_run = require('test_run').new()
+
+test_run:cmd('create server master with script="replication/gh-6036-order-master.lua"')
+test_run:cmd('create server replica1 with script="replication/gh-6036-order-replica1.lua"')
+test_run:cmd('create server replica2 with script="replication/gh-6036-order-replica2.lua"')
+
+test_run:cmd('start server master with wait=False')
+test_run:cmd('start server replica1 with wait=False')
+test_run:cmd('start server replica2 with wait=False')
+
+test_run:wait_fullmesh({"master", "replica1", "replica2"})
+
+test_run:switch("master")
+box.ctl.demote()
+assert(box.info.election.term == 1)
+
+test_run:switch("replica1")
+box.ctl.demote()
+assert(box.info.election.term == 1)
+
+test_run:switch("replica2")
+box.ctl.demote()
+assert(box.info.election.term == 1)
+
+--
+-- Drop connection between master and replica1
+test_run:switch("master")
+box.cfg({                                   \
+    replication = {                         \
+        "unix/:./master.sock",              \
+        "unix/:./replica2.sock",            \
+    },                                      \
+})
+test_run:switch("replica1")
+box.cfg({                                   \
+    replication = {                         \
+        "unix/:./replica1.sock",            \
+        "unix/:./replica2.sock",            \
+    },                                      \
+})
+
+--
+-- Initiate disk delay on replica2
+test_run:switch("replica2")
+assert(box.info.election.term == 1)
+box.error.injection.set('ERRINJ_WAL_DELAY', true)
+
+--
+-- Ping-pong the promote action between master and
+-- replica1 nodes, the term updates get queued on
+-- replica2 because of disk being disabled.
+test_run:switch("master")
+box.ctl.promote()
+assert(box.info.election.term == 2)
+box.ctl.demote()
+assert(box.info.election.term == 3)
+
+test_run:switch("replica1")
+box.ctl.promote()
+assert(box.info.election.term == 2)
+box.ctl.demote()
+assert(box.info.election.term == 3)
+
+test_run:switch("master")
+box.ctl.promote()
+assert(box.info.election.term == 4)
+
+--
+-- Finally turn back disk on replica2 so the
+-- terms get sequenced.
+test_run:switch("replica2")
+assert(box.info.election.term == 2)
+box.error.injection.set('ERRINJ_WAL_DELAY', false)
+test_run:wait_cond(function() return box.info.election.term == 4 end, 100)
+
+test_run:switch("default")
+test_run:cmd('stop server master')
+test_run:cmd('stop server replica1')
+test_run:cmd('stop server replica2')
+
+test_run:cmd('delete server master')
+test_run:cmd('delete server replica1')
+test_run:cmd('delete server replica2')
diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg
index 3eee0803c..ac2bedfd9 100644
--- a/test/replication/suite.cfg
+++ b/test/replication/suite.cfg
@@ -59,6 +59,7 @@
     "gh-6094-rs-uuid-mismatch.test.lua": {},
     "gh-6127-election-join-new.test.lua": {},
     "gh-6035-applier-filter.test.lua": {},
+    "gh-6036-term-order.test.lua": {},
     "election-candidate-promote.test.lua": {},
     "*": {
         "memtx": {"engine": "memtx"},
-- 
2.31.1



More information about the Tarantool-patches mailing list