From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp49.i.mail.ru (smtp49.i.mail.ru [94.100.177.109]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id BC7A84696C1 for ; Tue, 17 Nov 2020 19:14:29 +0300 (MSK) From: sergeyb@tarantool.org Date: Tue, 17 Nov 2020 19:13:53 +0300 Message-Id: <9eb76fb56017daca32e3fbc5b5415fc144eb0699.1605629206.git.sergeyb@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH 3/4] replication: add test with random leaders promotion and demotion List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org, v.shpilevoy@tarantool.org From: Sergey Bronnikov Part of #5055 Part of #5144 --- test/replication/qsync.lua | 31 ++++ test/replication/qsync1.lua | 1 + test/replication/qsync2.lua | 1 + test/replication/qsync3.lua | 1 + test/replication/qsync4.lua | 1 + test/replication/qsync5.lua | 1 + test/replication/qsync_random_leader.result | 148 ++++++++++++++++++ test/replication/qsync_random_leader.test.lua | 76 +++++++++ 8 files changed, 260 insertions(+) create mode 100644 test/replication/qsync.lua create mode 120000 test/replication/qsync1.lua create mode 120000 test/replication/qsync2.lua create mode 120000 test/replication/qsync3.lua create mode 120000 test/replication/qsync4.lua create mode 120000 test/replication/qsync5.lua create mode 100644 test/replication/qsync_random_leader.result create mode 100644 test/replication/qsync_random_leader.test.lua diff --git a/test/replication/qsync.lua b/test/replication/qsync.lua new file mode 100644 index 000000000..9bbc87239 --- /dev/null +++ b/test/replication/qsync.lua @@ -0,0 +1,31 @@ +#!/usr/bin/env tarantool + +-- get instance name from filename (qsync1.lua => qsync1) +local INSTANCE_ID = string.match(arg[0], "%d") + +local SOCKET_DIR = require('fio').cwd() + +local function instance_uri(instance_id) + return SOCKET_DIR..'/qsync'..instance_id..'.sock'; +end + +-- start console first +require('console').listen(os.getenv('ADMIN')) + +box.cfg({ + listen = instance_uri(INSTANCE_ID); + replication = { + instance_uri(1); + instance_uri(2); + instance_uri(3); + instance_uri(4); + instance_uri(5); + }; + replication_synchro_timeout = 1000; + replication_synchro_quorum = 5; + read_only = false; +}) + +box.once("bootstrap", function() + box.schema.user.grant("guest", 'replication') +end) diff --git a/test/replication/qsync1.lua b/test/replication/qsync1.lua new file mode 120000 index 000000000..df9f3a883 --- /dev/null +++ b/test/replication/qsync1.lua @@ -0,0 +1 @@ +qsync.lua \ No newline at end of file diff --git a/test/replication/qsync2.lua b/test/replication/qsync2.lua new file mode 120000 index 000000000..df9f3a883 --- /dev/null +++ b/test/replication/qsync2.lua @@ -0,0 +1 @@ +qsync.lua \ No newline at end of file diff --git a/test/replication/qsync3.lua b/test/replication/qsync3.lua new file mode 120000 index 000000000..df9f3a883 --- /dev/null +++ b/test/replication/qsync3.lua @@ -0,0 +1 @@ +qsync.lua \ No newline at end of file diff --git a/test/replication/qsync4.lua b/test/replication/qsync4.lua new file mode 120000 index 000000000..df9f3a883 --- /dev/null +++ b/test/replication/qsync4.lua @@ -0,0 +1 @@ +qsync.lua \ No newline at end of file diff --git a/test/replication/qsync5.lua b/test/replication/qsync5.lua new file mode 120000 index 000000000..df9f3a883 --- /dev/null +++ b/test/replication/qsync5.lua @@ -0,0 +1 @@ +qsync.lua \ No newline at end of file diff --git a/test/replication/qsync_random_leader.result b/test/replication/qsync_random_leader.result new file mode 100644 index 000000000..2b2df99db --- /dev/null +++ b/test/replication/qsync_random_leader.result @@ -0,0 +1,148 @@ +-- test-run result file version 2 +os = require('os') + | --- + | ... +env = require('test_run') + | --- + | ... +math = require('math') + | --- + | ... +fiber = require('fiber') + | --- + | ... +test_run = env.new() + | --- + | ... +netbox = require('net.box') + | --- + | ... + +orig_synchro_quorum = box.cfg.replication_synchro_quorum + | --- + | ... +orig_synchro_timeout = box.cfg.replication_synchro_timeout + | --- + | ... + +NUM_INSTANCES = 5 + | --- + | ... +SERVERS = {} + | --- + | ... +for i=1,NUM_INSTANCES do \ + SERVERS[i] = 'qsync' .. i \ +end; + | --- + | ... +SERVERS -- print instance names + | --- + | - - qsync1 + | - qsync2 + | - qsync3 + | - qsync4 + | - qsync5 + | ... + +math.randomseed(os.time()) + | --- + | ... +random = function(excluded_num, total) \ + local r = math.random(1, total) \ + if (r == excluded_num) then \ + return random(excluded_num, total) \ + end \ + return r \ +end + | --- + | ... + +-- Set 'broken' quorum on current leader. +-- Write value on current leader. +-- Pick a random replica in a cluster. +-- Set 'good' quorum on it and promote to a leader. +-- Make sure value is there and on an old leader. + +-- Testcase setup. +test_run:create_cluster(SERVERS) + | --- + | ... +test_run:wait_fullmesh(SERVERS) + | --- + | ... +test_run:switch('qsync1') + | --- + | - true + | ... +_ = box.schema.space.create('sync', {is_sync=true, engine = test_run:get_cfg('engine')}) + | --- + | ... +_ = box.space.sync:create_index('primary') + | --- + | ... +box.schema.user.grant('guest', 'write', 'space', 'sync') + | --- + | ... +test_run:switch('default') + | --- + | - true + | ... +current_leader_id = 1 + | --- + | ... +test_run:eval(SERVERS[current_leader_id], "box.ctl.clear_synchro_queue()") + | --- + | - [] + | ... + +SOCKET_DIR = require('fio').cwd() + | --- + | ... + +-- Testcase body. +for i=1,30 do \ + test_run:eval(SERVERS[current_leader_id], \ + "box.cfg{replication_synchro_quorum=6, replication_synchro_timeout=1000}") \ + c = netbox.connect(SOCKET_DIR..'/'..SERVERS[current_leader_id]..'.sock') \ + fiber.create(function() c.space.sync:insert{i} end) \ + new_leader_id = random(current_leader_id, #SERVERS) \ + test_run:eval(SERVERS[new_leader_id], \ + "box.cfg{replication_synchro_quorum=3, replication_synchro_timeout=0.01}") \ + test_run:eval(SERVERS[new_leader_id], "box.ctl.clear_synchro_queue()") \ + c:close() \ + replica = random(new_leader_id, #SERVERS) \ + test_run:wait_cond(function() return test_run:eval(SERVERS[replica], \ + string.format("box.space.sync:get{%d}", i))[1] ~= nil end) \ + test_run:wait_cond(function() return test_run:eval(SERVERS[current_leader_id], \ + string.format("box.space.sync:get{%d}", i))[1] ~= nil end) \ + new_leader_id = random(current_leader_id, #SERVERS) \ + current_leader_id = new_leader_id \ +end + | --- + | ... + +test_run:wait_cond(function() return test_run:eval('qsync1', \ + ("box.space.sync:count()")) == 30 end) + | --- + | - false + | ... + +-- Teardown. +test_run:switch('default') + | --- + | - true + | ... +test_run:eval(SERVERS[current_leader_id], 'box.space.sync:drop()') + | --- + | - [] + | ... +test_run:drop_cluster(SERVERS) + | --- + | ... +box.cfg{ \ + replication_synchro_quorum = orig_synchro_quorum, \ + replication_synchro_timeout = orig_synchro_timeout, \ +} + | --- + | ... diff --git a/test/replication/qsync_random_leader.test.lua b/test/replication/qsync_random_leader.test.lua new file mode 100644 index 000000000..d84366916 --- /dev/null +++ b/test/replication/qsync_random_leader.test.lua @@ -0,0 +1,76 @@ +os = require('os') +env = require('test_run') +math = require('math') +fiber = require('fiber') +test_run = env.new() +netbox = require('net.box') + +orig_synchro_quorum = box.cfg.replication_synchro_quorum +orig_synchro_timeout = box.cfg.replication_synchro_timeout + +NUM_INSTANCES = 5 +SERVERS = {} +for i=1,NUM_INSTANCES do \ + SERVERS[i] = 'qsync' .. i \ +end; +SERVERS -- print instance names + +math.randomseed(os.time()) +random = function(excluded_num, total) \ + local r = math.random(1, total) \ + if (r == excluded_num) then \ + return random(excluded_num, total) \ + end \ + return r \ +end + +-- Set 'broken' quorum on current leader. +-- Write value on current leader. +-- Pick a random replica in a cluster. +-- Set 'good' quorum on it and promote to a leader. +-- Make sure value is there and on an old leader. + +-- Testcase setup. +test_run:create_cluster(SERVERS) +test_run:wait_fullmesh(SERVERS) +test_run:switch('qsync1') +_ = box.schema.space.create('sync', {is_sync=true, engine = test_run:get_cfg('engine')}) +_ = box.space.sync:create_index('primary') +box.schema.user.grant('guest', 'write', 'space', 'sync') +test_run:switch('default') +current_leader_id = 1 +test_run:eval(SERVERS[current_leader_id], "box.ctl.clear_synchro_queue()") + +SOCKET_DIR = require('fio').cwd() + +-- Testcase body. +for i=1,30 do \ + test_run:eval(SERVERS[current_leader_id], \ + "box.cfg{replication_synchro_quorum=6, replication_synchro_timeout=1000}") \ + c = netbox.connect(SOCKET_DIR..'/'..SERVERS[current_leader_id]..'.sock') \ + fiber.create(function() c.space.sync:insert{i} end) \ + new_leader_id = random(current_leader_id, #SERVERS) \ + test_run:eval(SERVERS[new_leader_id], \ + "box.cfg{replication_synchro_quorum=3, replication_synchro_timeout=0.01}") \ + test_run:eval(SERVERS[new_leader_id], "box.ctl.clear_synchro_queue()") \ + c:close() \ + replica = random(new_leader_id, #SERVERS) \ + test_run:wait_cond(function() return test_run:eval(SERVERS[replica], \ + string.format("box.space.sync:get{%d}", i))[1] ~= nil end) \ + test_run:wait_cond(function() return test_run:eval(SERVERS[current_leader_id], \ + string.format("box.space.sync:get{%d}", i))[1] ~= nil end) \ + new_leader_id = random(current_leader_id, #SERVERS) \ + current_leader_id = new_leader_id \ +end + +test_run:wait_cond(function() return test_run:eval('qsync1', \ + ("box.space.sync:count()")) == 30 end) + +-- Teardown. +test_run:switch('default') +test_run:eval(SERVERS[current_leader_id], 'box.space.sync:drop()') +test_run:drop_cluster(SERVERS) +box.cfg{ \ + replication_synchro_quorum = orig_synchro_quorum, \ + replication_synchro_timeout = orig_synchro_timeout, \ +} -- 2.25.1