From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp56.i.mail.ru (smtp56.i.mail.ru [217.69.128.36]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 04166469719 for ; Mon, 16 Nov 2020 12:10:51 +0300 (MSK) References: <87cbacc2bbcb903aced7b8f42742eac14c9431ad.1605170394.git.sergeyb@tarantool.org> <20f04abf-588e-844a-24ed-d70ce5024e58@tarantool.org> From: Sergey Bronnikov Message-ID: <6db1b311-c989-6a1d-52fe-ca5b78552a75@tarantool.org> Date: Mon, 16 Nov 2020 12:10:50 +0300 MIME-Version: 1.0 In-Reply-To: <20f04abf-588e-844a-24ed-d70ce5024e58@tarantool.org> Content-Type: text/plain; charset="utf-8"; format="flowed" Content-Transfer-Encoding: 8bit Content-Language: en-US Subject: Re: [Tarantool-patches] [PATCH 2/3 v2] replication: add test with random leaders promotion and demotion List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Serge Petrenko , tarantool-patches@dev.tarantool.org, v.shpilevoy@tarantool.org Hi, Serge! thanks for review. I've updated patch, see diff's below. On 13.11.2020 18:10, Serge Petrenko wrote: > > 12.11.2020 12:10, sergeyb@tarantool.org пишет: >> From: Sergey Bronnikov >> >> Part of #5055 >> Part of #5144 > > > Hi! Thanks for the patch! > > >> --- >>   test/replication/qsync.lua                    |  30 ++++ >>   test/replication/qsync1.lua                   |   1 + >>   test/replication/qsync2.lua                   |   1 + >>   test/replication/qsync3.lua                   |   1 + >>   test/replication/qsync4.lua                   |   1 + >>   test/replication/qsync5.lua                   |   1 + >>   test/replication/qsync_random_leader.result   | 139 ++++++++++++++++++ >>   test/replication/qsync_random_leader.test.lua |  68 +++++++++ >>   8 files changed, 242 insertions(+) >>   create mode 100644 test/replication/qsync.lua >>   create mode 120000 test/replication/qsync1.lua >>   create mode 120000 test/replication/qsync2.lua >>   create mode 120000 test/replication/qsync3.lua >>   create mode 120000 test/replication/qsync4.lua >>   create mode 120000 test/replication/qsync5.lua >>   create mode 100644 test/replication/qsync_random_leader.result >>   create mode 100644 test/replication/qsync_random_leader.test.lua >> >> diff --git a/test/replication/qsync.lua b/test/replication/qsync.lua >> new file mode 100644 >> index 000000000..b15cc18c9 >> --- /dev/null >> +++ b/test/replication/qsync.lua >> @@ -0,0 +1,30 @@ >> +#!/usr/bin/env tarantool >> + >> +-- get instance name from filename (qsync1.lua => qsync1) >> +local INSTANCE_ID = string.match(arg[0], "%d") >> + >> +local SOCKET_DIR = require('fio').cwd() >> + >> +local function instance_uri(instance_id) >> +    return SOCKET_DIR..'/qsync'..instance_id..'.sock'; >> +end >> + >> +-- start console first >> +require('console').listen(os.getenv('ADMIN')) >> + >> +box.cfg({ >> +    listen = instance_uri(INSTANCE_ID); >> +    replication = { >> +        instance_uri(1); >> +        instance_uri(2); >> +        instance_uri(3); >> +        instance_uri(4); >> +        instance_uri(5); >> +    }; >> +}) >> + >> +box.once("bootstrap", function() >> +    box.cfg{replication_synchro_timeout = 1000, >> replication_synchro_quorum = 5} >> +    box.cfg{read_only = false} > > You may move these box.cfg calls to the initial box.cfg above. Agree. Fixed it with patch below: --- a/test/replication/qsync.lua +++ b/test/replication/qsync.lua @@ -21,10 +21,11 @@ box.cfg({          instance_uri(4);          instance_uri(5);      }; +    replication_synchro_timeout = 1000; +    replication_synchro_quorum = 5; +    read_only = false;  })  box.once("bootstrap", function() -    box.cfg{replication_synchro_timeout = 1000, replication_synchro_quorum = 5} -    box.cfg{read_only = false}      box.schema.user.grant("guest", 'replication')  end) > >> +    box.schema.user.grant("guest", 'replication') >> +end) >> diff --git a/test/replication/qsync1.lua b/test/replication/qsync1.lua >> new file mode 120000 >> index 000000000..df9f3a883 >> --- /dev/null >> +++ b/test/replication/qsync1.lua >> @@ -0,0 +1 @@ >> +qsync.lua >> \ No newline at end of file >> diff --git a/test/replication/qsync2.lua b/test/replication/qsync2.lua >> new file mode 120000 >> index 000000000..df9f3a883 >> --- /dev/null >> +++ b/test/replication/qsync2.lua >> @@ -0,0 +1 @@ >> +qsync.lua >> \ No newline at end of file > > >> + >> +-- Testcase body. >> +for i=1,300 do \ >> + test_run:eval(SERVERS[current_leader_id], \ >> +        string.format("box.space.sync:insert{%d}", >> i))                         \ >> +    new_leader_id = random(current_leader_id, >> #SERVERS)                        \ >> +    log.info(string.format("current leader id %d, new leader id >> %d",           \ >> +                           current_leader_id, >> new_leader_id))                  \ >> +    test_run:eval(SERVERS[new_leader_id], >> "box.ctl.clear_synchro_queue()")     \ >> +    replica = random(new_leader_id, >> #SERVERS)                                  \ >> +    test_run:wait_cond(function() return >> test_run:eval(SERVERS[replica],       \ >> +                       string.format("box.space.sync:get{%d}", i)) >> ~= nil end) \ >> +    test_run:wait_cond(function() return >> test_run:eval(SERVERS[current_leader_id], \ >> +                       string.format("box.space.sync:get{%d}", i)) >> ~= nil end) \ >> +    current_leader_id = >> new_leader_id                                          \ >> +end > > > Discussed verbally. Please update the testcase, so that insertions > are done with too high quorum. Updated test and added case with 'broken' quorum. Result file has been updated too. Number of test iterations has been reduced from 300 to 200. @@ -3,7 +3,7 @@  math = require('math')  fiber = require('fiber')  test_run = env.new() -log = require('log') +netbox = require('net.box')  orig_synchro_quorum = box.cfg.replication_synchro_quorum  orig_synchro_timeout = box.cfg.replication_synchro_timeout @@ -24,10 +24,11 @@      return r \  end +-- Set 'broken' quorum on current leader.  -- Write value on current leader.  -- Pick a random replica in a cluster. --- Promote replica to leader. --- Make sure value is there. +-- Set 'good' quorum on it and promote to a leader. +-- Make sure value is there and on an old leader.  -- Testcase setup.  test_run:create_cluster(SERVERS) @@ -35,28 +36,35 @@  test_run:switch('qsync1')  _ = box.schema.space.create('sync', {is_sync=true, engine = test_run:get_cfg('engine')})  _ = box.space.sync:create_index('primary') +box.schema.user.grant('guest', 'write', 'space', 'sync')  test_run:switch('default')  current_leader_id = 1  test_run:eval(SERVERS[current_leader_id], "box.ctl.clear_synchro_queue()") +SOCKET_DIR = require('fio').cwd() +  -- Testcase body. -for i=1,300 do \ +for i=1,200 do \ test_run:eval(SERVERS[current_leader_id], \ -        string.format("box.space.sync:insert{%d}", i))                         \ +        "box.cfg{replication_synchro_quorum=6, replication_synchro_timeout=1000}") \ +    c = netbox.connect(SOCKET_DIR..'/'..SERVERS[current_leader_id]..'.sock') \ +    fiber.create(function() c.space.sync:insert{i} end)                        \      new_leader_id = random(current_leader_id, #SERVERS)                        \ -    log.info(string.format("current leader id %d, new leader id %d",           \ -                           current_leader_id, new_leader_id))                  \ + test_run:eval(SERVERS[new_leader_id], \ +        "box.cfg{replication_synchro_quorum=3, replication_synchro_timeout=0.01}") \      test_run:eval(SERVERS[new_leader_id], "box.ctl.clear_synchro_queue()")     \ + c:close() \      replica = random(new_leader_id, #SERVERS)                                  \      test_run:wait_cond(function() return test_run:eval(SERVERS[replica],       \ -                       string.format("box.space.sync:get{%d}", i)) ~= nil end) \ +                       string.format("box.space.sync:get{%d}", i))[1] ~= nil end)  \      test_run:wait_cond(function() return test_run:eval(SERVERS[current_leader_id], \ -                       string.format("box.space.sync:get{%d}", i)) ~= nil end) \ +                       string.format("box.space.sync:get{%d}", i))[1] ~= nil end)  \ +    new_leader_id = random(current_leader_id, #SERVERS)                        \      current_leader_id = new_leader_id                                          \  end  test_run:switch('qsync1') -box.space.sync:count() -- 300 +box.space.sync:count() -- 200  -- Teardown.  test_run:switch('default') > > >> + >> +test_run:switch('qsync1') >> +box.space.sync:count() -- 300 >> + >> +-- Teardown. >> +test_run:switch('default') >> +test_run:eval(SERVERS[current_leader_id], 'box.space.sync:drop()') >> +test_run:drop_cluster(SERVERS) >> +box.cfg{ \ >> +    replication_synchro_quorum = >> orig_synchro_quorum,                          \ >> +    replication_synchro_timeout = >> orig_synchro_timeout,                        \ >> +} >