[Tarantool-patches] [PATCH 3/4] replication: add test with random leaders promotion and demotion
sergeyb at tarantool.org
sergeyb at tarantool.org
Tue Nov 17 19:13:53 MSK 2020
From: Sergey Bronnikov <sergeyb at tarantool.org>
Part of #5055
Part of #5144
---
test/replication/qsync.lua | 31 ++++
test/replication/qsync1.lua | 1 +
test/replication/qsync2.lua | 1 +
test/replication/qsync3.lua | 1 +
test/replication/qsync4.lua | 1 +
test/replication/qsync5.lua | 1 +
test/replication/qsync_random_leader.result | 148 ++++++++++++++++++
test/replication/qsync_random_leader.test.lua | 76 +++++++++
8 files changed, 260 insertions(+)
create mode 100644 test/replication/qsync.lua
create mode 120000 test/replication/qsync1.lua
create mode 120000 test/replication/qsync2.lua
create mode 120000 test/replication/qsync3.lua
create mode 120000 test/replication/qsync4.lua
create mode 120000 test/replication/qsync5.lua
create mode 100644 test/replication/qsync_random_leader.result
create mode 100644 test/replication/qsync_random_leader.test.lua
diff --git a/test/replication/qsync.lua b/test/replication/qsync.lua
new file mode 100644
index 000000000..9bbc87239
--- /dev/null
+++ b/test/replication/qsync.lua
@@ -0,0 +1,31 @@
+#!/usr/bin/env tarantool
+
+-- get instance name from filename (qsync1.lua => qsync1)
+local INSTANCE_ID = string.match(arg[0], "%d")
+
+local SOCKET_DIR = require('fio').cwd()
+
+local function instance_uri(instance_id)
+ return SOCKET_DIR..'/qsync'..instance_id..'.sock';
+end
+
+-- start console first
+require('console').listen(os.getenv('ADMIN'))
+
+box.cfg({
+ listen = instance_uri(INSTANCE_ID);
+ replication = {
+ instance_uri(1);
+ instance_uri(2);
+ instance_uri(3);
+ instance_uri(4);
+ instance_uri(5);
+ };
+ replication_synchro_timeout = 1000;
+ replication_synchro_quorum = 5;
+ read_only = false;
+})
+
+box.once("bootstrap", function()
+ box.schema.user.grant("guest", 'replication')
+end)
diff --git a/test/replication/qsync1.lua b/test/replication/qsync1.lua
new file mode 120000
index 000000000..df9f3a883
--- /dev/null
+++ b/test/replication/qsync1.lua
@@ -0,0 +1 @@
+qsync.lua
\ No newline at end of file
diff --git a/test/replication/qsync2.lua b/test/replication/qsync2.lua
new file mode 120000
index 000000000..df9f3a883
--- /dev/null
+++ b/test/replication/qsync2.lua
@@ -0,0 +1 @@
+qsync.lua
\ No newline at end of file
diff --git a/test/replication/qsync3.lua b/test/replication/qsync3.lua
new file mode 120000
index 000000000..df9f3a883
--- /dev/null
+++ b/test/replication/qsync3.lua
@@ -0,0 +1 @@
+qsync.lua
\ No newline at end of file
diff --git a/test/replication/qsync4.lua b/test/replication/qsync4.lua
new file mode 120000
index 000000000..df9f3a883
--- /dev/null
+++ b/test/replication/qsync4.lua
@@ -0,0 +1 @@
+qsync.lua
\ No newline at end of file
diff --git a/test/replication/qsync5.lua b/test/replication/qsync5.lua
new file mode 120000
index 000000000..df9f3a883
--- /dev/null
+++ b/test/replication/qsync5.lua
@@ -0,0 +1 @@
+qsync.lua
\ No newline at end of file
diff --git a/test/replication/qsync_random_leader.result b/test/replication/qsync_random_leader.result
new file mode 100644
index 000000000..2b2df99db
--- /dev/null
+++ b/test/replication/qsync_random_leader.result
@@ -0,0 +1,148 @@
+-- test-run result file version 2
+os = require('os')
+ | ---
+ | ...
+env = require('test_run')
+ | ---
+ | ...
+math = require('math')
+ | ---
+ | ...
+fiber = require('fiber')
+ | ---
+ | ...
+test_run = env.new()
+ | ---
+ | ...
+netbox = require('net.box')
+ | ---
+ | ...
+
+orig_synchro_quorum = box.cfg.replication_synchro_quorum
+ | ---
+ | ...
+orig_synchro_timeout = box.cfg.replication_synchro_timeout
+ | ---
+ | ...
+
+NUM_INSTANCES = 5
+ | ---
+ | ...
+SERVERS = {}
+ | ---
+ | ...
+for i=1,NUM_INSTANCES do \
+ SERVERS[i] = 'qsync' .. i \
+end;
+ | ---
+ | ...
+SERVERS -- print instance names
+ | ---
+ | - - qsync1
+ | - qsync2
+ | - qsync3
+ | - qsync4
+ | - qsync5
+ | ...
+
+math.randomseed(os.time())
+ | ---
+ | ...
+random = function(excluded_num, total) \
+ local r = math.random(1, total) \
+ if (r == excluded_num) then \
+ return random(excluded_num, total) \
+ end \
+ return r \
+end
+ | ---
+ | ...
+
+-- Set 'broken' quorum on current leader.
+-- Write value on current leader.
+-- Pick a random replica in a cluster.
+-- Set 'good' quorum on it and promote to a leader.
+-- Make sure value is there and on an old leader.
+
+-- Testcase setup.
+test_run:create_cluster(SERVERS)
+ | ---
+ | ...
+test_run:wait_fullmesh(SERVERS)
+ | ---
+ | ...
+test_run:switch('qsync1')
+ | ---
+ | - true
+ | ...
+_ = box.schema.space.create('sync', {is_sync=true, engine = test_run:get_cfg('engine')})
+ | ---
+ | ...
+_ = box.space.sync:create_index('primary')
+ | ---
+ | ...
+box.schema.user.grant('guest', 'write', 'space', 'sync')
+ | ---
+ | ...
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+current_leader_id = 1
+ | ---
+ | ...
+test_run:eval(SERVERS[current_leader_id], "box.ctl.clear_synchro_queue()")
+ | ---
+ | - []
+ | ...
+
+SOCKET_DIR = require('fio').cwd()
+ | ---
+ | ...
+
+-- Testcase body.
+for i=1,30 do \
+ test_run:eval(SERVERS[current_leader_id], \
+ "box.cfg{replication_synchro_quorum=6, replication_synchro_timeout=1000}") \
+ c = netbox.connect(SOCKET_DIR..'/'..SERVERS[current_leader_id]..'.sock') \
+ fiber.create(function() c.space.sync:insert{i} end) \
+ new_leader_id = random(current_leader_id, #SERVERS) \
+ test_run:eval(SERVERS[new_leader_id], \
+ "box.cfg{replication_synchro_quorum=3, replication_synchro_timeout=0.01}") \
+ test_run:eval(SERVERS[new_leader_id], "box.ctl.clear_synchro_queue()") \
+ c:close() \
+ replica = random(new_leader_id, #SERVERS) \
+ test_run:wait_cond(function() return test_run:eval(SERVERS[replica], \
+ string.format("box.space.sync:get{%d}", i))[1] ~= nil end) \
+ test_run:wait_cond(function() return test_run:eval(SERVERS[current_leader_id], \
+ string.format("box.space.sync:get{%d}", i))[1] ~= nil end) \
+ new_leader_id = random(current_leader_id, #SERVERS) \
+ current_leader_id = new_leader_id \
+end
+ | ---
+ | ...
+
+test_run:wait_cond(function() return test_run:eval('qsync1', \
+ ("box.space.sync:count()")) == 30 end)
+ | ---
+ | - false
+ | ...
+
+-- Teardown.
+test_run:switch('default')
+ | ---
+ | - true
+ | ...
+test_run:eval(SERVERS[current_leader_id], 'box.space.sync:drop()')
+ | ---
+ | - []
+ | ...
+test_run:drop_cluster(SERVERS)
+ | ---
+ | ...
+box.cfg{ \
+ replication_synchro_quorum = orig_synchro_quorum, \
+ replication_synchro_timeout = orig_synchro_timeout, \
+}
+ | ---
+ | ...
diff --git a/test/replication/qsync_random_leader.test.lua b/test/replication/qsync_random_leader.test.lua
new file mode 100644
index 000000000..d84366916
--- /dev/null
+++ b/test/replication/qsync_random_leader.test.lua
@@ -0,0 +1,76 @@
+os = require('os')
+env = require('test_run')
+math = require('math')
+fiber = require('fiber')
+test_run = env.new()
+netbox = require('net.box')
+
+orig_synchro_quorum = box.cfg.replication_synchro_quorum
+orig_synchro_timeout = box.cfg.replication_synchro_timeout
+
+NUM_INSTANCES = 5
+SERVERS = {}
+for i=1,NUM_INSTANCES do \
+ SERVERS[i] = 'qsync' .. i \
+end;
+SERVERS -- print instance names
+
+math.randomseed(os.time())
+random = function(excluded_num, total) \
+ local r = math.random(1, total) \
+ if (r == excluded_num) then \
+ return random(excluded_num, total) \
+ end \
+ return r \
+end
+
+-- Set 'broken' quorum on current leader.
+-- Write value on current leader.
+-- Pick a random replica in a cluster.
+-- Set 'good' quorum on it and promote to a leader.
+-- Make sure value is there and on an old leader.
+
+-- Testcase setup.
+test_run:create_cluster(SERVERS)
+test_run:wait_fullmesh(SERVERS)
+test_run:switch('qsync1')
+_ = box.schema.space.create('sync', {is_sync=true, engine = test_run:get_cfg('engine')})
+_ = box.space.sync:create_index('primary')
+box.schema.user.grant('guest', 'write', 'space', 'sync')
+test_run:switch('default')
+current_leader_id = 1
+test_run:eval(SERVERS[current_leader_id], "box.ctl.clear_synchro_queue()")
+
+SOCKET_DIR = require('fio').cwd()
+
+-- Testcase body.
+for i=1,30 do \
+ test_run:eval(SERVERS[current_leader_id], \
+ "box.cfg{replication_synchro_quorum=6, replication_synchro_timeout=1000}") \
+ c = netbox.connect(SOCKET_DIR..'/'..SERVERS[current_leader_id]..'.sock') \
+ fiber.create(function() c.space.sync:insert{i} end) \
+ new_leader_id = random(current_leader_id, #SERVERS) \
+ test_run:eval(SERVERS[new_leader_id], \
+ "box.cfg{replication_synchro_quorum=3, replication_synchro_timeout=0.01}") \
+ test_run:eval(SERVERS[new_leader_id], "box.ctl.clear_synchro_queue()") \
+ c:close() \
+ replica = random(new_leader_id, #SERVERS) \
+ test_run:wait_cond(function() return test_run:eval(SERVERS[replica], \
+ string.format("box.space.sync:get{%d}", i))[1] ~= nil end) \
+ test_run:wait_cond(function() return test_run:eval(SERVERS[current_leader_id], \
+ string.format("box.space.sync:get{%d}", i))[1] ~= nil end) \
+ new_leader_id = random(current_leader_id, #SERVERS) \
+ current_leader_id = new_leader_id \
+end
+
+test_run:wait_cond(function() return test_run:eval('qsync1', \
+ ("box.space.sync:count()")) == 30 end)
+
+-- Teardown.
+test_run:switch('default')
+test_run:eval(SERVERS[current_leader_id], 'box.space.sync:drop()')
+test_run:drop_cluster(SERVERS)
+box.cfg{ \
+ replication_synchro_quorum = orig_synchro_quorum, \
+ replication_synchro_timeout = orig_synchro_timeout, \
+}
--
2.25.1
More information about the Tarantool-patches
mailing list