[Tarantool-patches] [PATCH 3/4] replication: add test with random leaders promotion and demotion

sergeyb at tarantool.org sergeyb at tarantool.org
Tue Nov 17 19:13:53 MSK 2020


From: Sergey Bronnikov <sergeyb at tarantool.org>

Part of #5055
Part of #5144
---
 test/replication/qsync.lua                    |  31 ++++
 test/replication/qsync1.lua                   |   1 +
 test/replication/qsync2.lua                   |   1 +
 test/replication/qsync3.lua                   |   1 +
 test/replication/qsync4.lua                   |   1 +
 test/replication/qsync5.lua                   |   1 +
 test/replication/qsync_random_leader.result   | 148 ++++++++++++++++++
 test/replication/qsync_random_leader.test.lua |  76 +++++++++
 8 files changed, 260 insertions(+)
 create mode 100644 test/replication/qsync.lua
 create mode 120000 test/replication/qsync1.lua
 create mode 120000 test/replication/qsync2.lua
 create mode 120000 test/replication/qsync3.lua
 create mode 120000 test/replication/qsync4.lua
 create mode 120000 test/replication/qsync5.lua
 create mode 100644 test/replication/qsync_random_leader.result
 create mode 100644 test/replication/qsync_random_leader.test.lua

diff --git a/test/replication/qsync.lua b/test/replication/qsync.lua
new file mode 100644
index 000000000..9bbc87239
--- /dev/null
+++ b/test/replication/qsync.lua
@@ -0,0 +1,31 @@
+#!/usr/bin/env tarantool
+
+-- get instance name from filename (qsync1.lua => qsync1)
+local INSTANCE_ID = string.match(arg[0], "%d")
+
+local SOCKET_DIR = require('fio').cwd()
+
+local function instance_uri(instance_id)
+    return SOCKET_DIR..'/qsync'..instance_id..'.sock';
+end
+
+-- start console first
+require('console').listen(os.getenv('ADMIN'))
+
+box.cfg({
+    listen = instance_uri(INSTANCE_ID);
+    replication = {
+        instance_uri(1);
+        instance_uri(2);
+        instance_uri(3);
+        instance_uri(4);
+        instance_uri(5);
+    };
+    replication_synchro_timeout = 1000;
+    replication_synchro_quorum = 5;
+    read_only = false;
+})
+
+box.once("bootstrap", function()
+    box.schema.user.grant("guest", 'replication')
+end)
diff --git a/test/replication/qsync1.lua b/test/replication/qsync1.lua
new file mode 120000
index 000000000..df9f3a883
--- /dev/null
+++ b/test/replication/qsync1.lua
@@ -0,0 +1 @@
+qsync.lua
\ No newline at end of file
diff --git a/test/replication/qsync2.lua b/test/replication/qsync2.lua
new file mode 120000
index 000000000..df9f3a883
--- /dev/null
+++ b/test/replication/qsync2.lua
@@ -0,0 +1 @@
+qsync.lua
\ No newline at end of file
diff --git a/test/replication/qsync3.lua b/test/replication/qsync3.lua
new file mode 120000
index 000000000..df9f3a883
--- /dev/null
+++ b/test/replication/qsync3.lua
@@ -0,0 +1 @@
+qsync.lua
\ No newline at end of file
diff --git a/test/replication/qsync4.lua b/test/replication/qsync4.lua
new file mode 120000
index 000000000..df9f3a883
--- /dev/null
+++ b/test/replication/qsync4.lua
@@ -0,0 +1 @@
+qsync.lua
\ No newline at end of file
diff --git a/test/replication/qsync5.lua b/test/replication/qsync5.lua
new file mode 120000
index 000000000..df9f3a883
--- /dev/null
+++ b/test/replication/qsync5.lua
@@ -0,0 +1 @@
+qsync.lua
\ No newline at end of file
diff --git a/test/replication/qsync_random_leader.result b/test/replication/qsync_random_leader.result
new file mode 100644
index 000000000..2b2df99db
--- /dev/null
+++ b/test/replication/qsync_random_leader.result
@@ -0,0 +1,148 @@
+-- test-run result file version 2
+os = require('os')
+ | ---
+ | ...
+env = require('test_run')
+ | ---
+ | ...
+math = require('math')
+ | ---
+ | ...
+fiber = require('fiber')
+ | ---
+ | ...
+test_run = env.new()
+ | ---
+ | ...
+netbox = require('net.box')
+ | ---
+ | ...
+
+orig_synchro_quorum = box.cfg.replication_synchro_quorum
+ | ---
+ | ...
+orig_synchro_timeout = box.cfg.replication_synchro_timeout
+ | ---
+ | ...
+
+NUM_INSTANCES = 5
+ | ---
+ | ...
+SERVERS = {}
+ | ---
+ | ...
+for i=1,NUM_INSTANCES do                                                       \
+    SERVERS[i] = 'qsync' .. i                                                  \
+end;
+ | ---
+ | ...
+SERVERS -- print instance names
+ | ---
+ | - - qsync1
+ |   - qsync2
+ |   - qsync3
+ |   - qsync4
+ |   - qsync5
+ | ...
+
+math.randomseed(os.time())
+ | ---
+ | ...
+random = function(excluded_num, total)                                         \
+    local r = math.random(1, total)                                            \
+    if (r == excluded_num) then                                                \
+        return random(excluded_num, total)                                     \
+    end                                                                        \
+    return r                                                                   \
+end
+ | ---
+ | ...
+
+-- Set 'broken' quorum on current leader.
+-- Write value on current leader.
+-- Pick a random replica in a cluster.
+-- Set 'good' quorum on it and promote to a leader.
+-- Make sure value is there and on an old leader.
+
+-- Testcase setup.
+test_run:create_cluster(SERVERS)
+ | ---
+ | ...
+test_run:wait_fullmesh(SERVERS)
+ | ---
+ | ...
+test_run:switch('qsync1')
+ | ---
+ | - true
+ | ...
+_ = box.schema.space.create('sync', {is_sync=true, engine = test_run:get_cfg('engine')})
+ | ---
+ | ...
+_ = box.space.sync:create_index('primary')
+ | ---
+ | ...
+box.schema.user.grant('guest', 'write', 'space', 'sync')
+ | ---
+ | ...
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+current_leader_id = 1
+ | ---
+ | ...
+test_run:eval(SERVERS[current_leader_id], "box.ctl.clear_synchro_queue()")
+ | ---
+ | - []
+ | ...
+
+SOCKET_DIR = require('fio').cwd()
+ | ---
+ | ...
+
+-- Testcase body.
+for i=1,30 do                                                                  \
+    test_run:eval(SERVERS[current_leader_id],                                  \
+        "box.cfg{replication_synchro_quorum=6, replication_synchro_timeout=1000}") \
+    c = netbox.connect(SOCKET_DIR..'/'..SERVERS[current_leader_id]..'.sock')   \
+    fiber.create(function() c.space.sync:insert{i} end)                        \
+    new_leader_id = random(current_leader_id, #SERVERS)                        \
+    test_run:eval(SERVERS[new_leader_id],                                      \
+        "box.cfg{replication_synchro_quorum=3, replication_synchro_timeout=0.01}") \
+    test_run:eval(SERVERS[new_leader_id], "box.ctl.clear_synchro_queue()")     \
+    c:close()                                                                  \
+    replica = random(new_leader_id, #SERVERS)                                  \
+    test_run:wait_cond(function() return test_run:eval(SERVERS[replica],       \
+                       string.format("box.space.sync:get{%d}", i))[1] ~= nil end)  \
+    test_run:wait_cond(function() return test_run:eval(SERVERS[current_leader_id], \
+                       string.format("box.space.sync:get{%d}", i))[1] ~= nil end)  \
+    new_leader_id = random(current_leader_id, #SERVERS)                        \
+    current_leader_id = new_leader_id                                          \
+end
+ | ---
+ | ...
+
+test_run:wait_cond(function() return test_run:eval('qsync1',                   \
+                   ("box.space.sync:count()")) == 30 end)
+ | ---
+ | - false
+ | ...
+
+-- Teardown.
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+test_run:eval(SERVERS[current_leader_id], 'box.space.sync:drop()')
+ | ---
+ | - []
+ | ...
+test_run:drop_cluster(SERVERS)
+ | ---
+ | ...
+box.cfg{                                                                       \
+    replication_synchro_quorum = orig_synchro_quorum,                          \
+    replication_synchro_timeout = orig_synchro_timeout,                        \
+}
+ | ---
+ | ...
diff --git a/test/replication/qsync_random_leader.test.lua b/test/replication/qsync_random_leader.test.lua
new file mode 100644
index 000000000..d84366916
--- /dev/null
+++ b/test/replication/qsync_random_leader.test.lua
@@ -0,0 +1,76 @@
+os = require('os')
+env = require('test_run')
+math = require('math')
+fiber = require('fiber')
+test_run = env.new()
+netbox = require('net.box')
+
+orig_synchro_quorum = box.cfg.replication_synchro_quorum
+orig_synchro_timeout = box.cfg.replication_synchro_timeout
+
+NUM_INSTANCES = 5
+SERVERS = {}
+for i=1,NUM_INSTANCES do                                                       \
+    SERVERS[i] = 'qsync' .. i                                                  \
+end;
+SERVERS -- print instance names
+
+math.randomseed(os.time())
+random = function(excluded_num, total)                                         \
+    local r = math.random(1, total)                                            \
+    if (r == excluded_num) then                                                \
+        return random(excluded_num, total)                                     \
+    end                                                                        \
+    return r                                                                   \
+end
+
+-- Set 'broken' quorum on current leader.
+-- Write value on current leader.
+-- Pick a random replica in a cluster.
+-- Set 'good' quorum on it and promote to a leader.
+-- Make sure value is there and on an old leader.
+
+-- Testcase setup.
+test_run:create_cluster(SERVERS)
+test_run:wait_fullmesh(SERVERS)
+test_run:switch('qsync1')
+_ = box.schema.space.create('sync', {is_sync=true, engine = test_run:get_cfg('engine')})
+_ = box.space.sync:create_index('primary')
+box.schema.user.grant('guest', 'write', 'space', 'sync')
+test_run:switch('default')
+current_leader_id = 1
+test_run:eval(SERVERS[current_leader_id], "box.ctl.clear_synchro_queue()")
+
+SOCKET_DIR = require('fio').cwd()
+
+-- Testcase body.
+for i=1,30 do                                                                  \
+    test_run:eval(SERVERS[current_leader_id],                                  \
+        "box.cfg{replication_synchro_quorum=6, replication_synchro_timeout=1000}") \
+    c = netbox.connect(SOCKET_DIR..'/'..SERVERS[current_leader_id]..'.sock')   \
+    fiber.create(function() c.space.sync:insert{i} end)                        \
+    new_leader_id = random(current_leader_id, #SERVERS)                        \
+    test_run:eval(SERVERS[new_leader_id],                                      \
+        "box.cfg{replication_synchro_quorum=3, replication_synchro_timeout=0.01}") \
+    test_run:eval(SERVERS[new_leader_id], "box.ctl.clear_synchro_queue()")     \
+    c:close()                                                                  \
+    replica = random(new_leader_id, #SERVERS)                                  \
+    test_run:wait_cond(function() return test_run:eval(SERVERS[replica],       \
+                       string.format("box.space.sync:get{%d}", i))[1] ~= nil end)  \
+    test_run:wait_cond(function() return test_run:eval(SERVERS[current_leader_id], \
+                       string.format("box.space.sync:get{%d}", i))[1] ~= nil end)  \
+    new_leader_id = random(current_leader_id, #SERVERS)                        \
+    current_leader_id = new_leader_id                                          \
+end
+
+test_run:wait_cond(function() return test_run:eval('qsync1',                   \
+                   ("box.space.sync:count()")) == 30 end)
+
+-- Teardown.
+test_run:switch('default')
+test_run:eval(SERVERS[current_leader_id], 'box.space.sync:drop()')
+test_run:drop_cluster(SERVERS)
+box.cfg{                                                                       \
+    replication_synchro_quorum = orig_synchro_quorum,                          \
+    replication_synchro_timeout = orig_synchro_timeout,                        \
+}
-- 
2.25.1



More information about the Tarantool-patches mailing list