[Tarantool-patches] [PATCH 2/3 v2] replication: add test with random leaders promotion and demotion

Sergey Bronnikov sergeyb at tarantool.org
Mon Nov 16 12:10:50 MSK 2020


Hi, Serge!

thanks for review. I've updated patch, see diff's below.


On 13.11.2020 18:10, Serge Petrenko wrote:
>
> 12.11.2020 12:10, sergeyb at tarantool.org пишет:
>> From: Sergey Bronnikov<sergeyb at tarantool.org>
>>
>> Part of #5055
>> Part of #5144
>
>
> Hi! Thanks for the patch!
>
>
>> ---
>>   test/replication/qsync.lua                    |  30 ++++
>>   test/replication/qsync1.lua                   |   1 +
>>   test/replication/qsync2.lua                   |   1 +
>>   test/replication/qsync3.lua                   |   1 +
>>   test/replication/qsync4.lua                   |   1 +
>>   test/replication/qsync5.lua                   |   1 +
>>   test/replication/qsync_random_leader.result   | 139 ++++++++++++++++++
>>   test/replication/qsync_random_leader.test.lua |  68 +++++++++
>>   8 files changed, 242 insertions(+)
>>   create mode 100644 test/replication/qsync.lua
>>   create mode 120000 test/replication/qsync1.lua
>>   create mode 120000 test/replication/qsync2.lua
>>   create mode 120000 test/replication/qsync3.lua
>>   create mode 120000 test/replication/qsync4.lua
>>   create mode 120000 test/replication/qsync5.lua
>>   create mode 100644 test/replication/qsync_random_leader.result
>>   create mode 100644 test/replication/qsync_random_leader.test.lua
>>
>> diff --git a/test/replication/qsync.lua b/test/replication/qsync.lua
>> new file mode 100644
>> index 000000000..b15cc18c9
>> --- /dev/null
>> +++ b/test/replication/qsync.lua
>> @@ -0,0 +1,30 @@
>> +#!/usr/bin/env tarantool
>> +
>> +-- get instance name from filename (qsync1.lua => qsync1)
>> +local INSTANCE_ID = string.match(arg[0], "%d")
>> +
>> +local SOCKET_DIR = require('fio').cwd()
>> +
>> +local function instance_uri(instance_id)
>> +    return SOCKET_DIR..'/qsync'..instance_id..'.sock';
>> +end
>> +
>> +-- start console first
>> +require('console').listen(os.getenv('ADMIN'))
>> +
>> +box.cfg({
>> +    listen = instance_uri(INSTANCE_ID);
>> +    replication = {
>> +        instance_uri(1);
>> +        instance_uri(2);
>> +        instance_uri(3);
>> +        instance_uri(4);
>> +        instance_uri(5);
>> +    };
>> +})
>> +
>> +box.once("bootstrap", function()
>> +    box.cfg{replication_synchro_timeout = 1000, 
>> replication_synchro_quorum = 5}
>> +    box.cfg{read_only = false}
>
> You may move these box.cfg calls to the initial box.cfg above.


Agree. Fixed it with patch below:

--- a/test/replication/qsync.lua
+++ b/test/replication/qsync.lua
@@ -21,10 +21,11 @@ box.cfg({
          instance_uri(4);
          instance_uri(5);
      };
+    replication_synchro_timeout = 1000;
+    replication_synchro_quorum = 5;
+    read_only = false;
  })

  box.once("bootstrap", function()
-    box.cfg{replication_synchro_timeout = 1000, 
replication_synchro_quorum = 5}
-    box.cfg{read_only = false}
      box.schema.user.grant("guest", 'replication')
  end)

>
>> +    box.schema.user.grant("guest", 'replication')
>> +end)
>> diff --git a/test/replication/qsync1.lua b/test/replication/qsync1.lua
>> new file mode 120000
>> index 000000000..df9f3a883
>> --- /dev/null
>> +++ b/test/replication/qsync1.lua
>> @@ -0,0 +1 @@
>> +qsync.lua
>> \ No newline at end of file
>> diff --git a/test/replication/qsync2.lua b/test/replication/qsync2.lua
>> new file mode 120000
>> index 000000000..df9f3a883
>> --- /dev/null
>> +++ b/test/replication/qsync2.lua
>> @@ -0,0 +1 @@
>> +qsync.lua
>> \ No newline at end of file
>
>
>> +
>> +-- Testcase body.
>> +for i=1,300 do \
>> + test_run:eval(SERVERS[current_leader_id], \
>> +        string.format("box.space.sync:insert{%d}", 
>> i))                         \
>> +    new_leader_id = random(current_leader_id, 
>> #SERVERS)                        \
>> +    log.info(string.format("current leader id %d, new leader id 
>> %d",           \
>> +                           current_leader_id, 
>> new_leader_id))                  \
>> +    test_run:eval(SERVERS[new_leader_id], 
>> "box.ctl.clear_synchro_queue()")     \
>> +    replica = random(new_leader_id, 
>> #SERVERS)                                  \
>> +    test_run:wait_cond(function() return 
>> test_run:eval(SERVERS[replica],       \
>> +                       string.format("box.space.sync:get{%d}", i)) 
>> ~= nil end) \
>> +    test_run:wait_cond(function() return 
>> test_run:eval(SERVERS[current_leader_id], \
>> +                       string.format("box.space.sync:get{%d}", i)) 
>> ~= nil end) \
>> +    current_leader_id = 
>> new_leader_id                                          \
>> +end
>
>
> Discussed verbally. Please update the testcase, so that insertions
> are done with too high quorum.

Updated test and added case with 'broken' quorum. Result file has been 
updated too.

Number of test iterations has been reduced from 300 to 200.

@@ -3,7 +3,7 @@
  math = require('math')
  fiber = require('fiber')
  test_run = env.new()
-log = require('log')
+netbox = require('net.box')

  orig_synchro_quorum = box.cfg.replication_synchro_quorum
  orig_synchro_timeout = box.cfg.replication_synchro_timeout
@@ -24,10 +24,11 @@
      return r \
  end

+-- Set 'broken' quorum on current leader.
  -- Write value on current leader.
  -- Pick a random replica in a cluster.
--- Promote replica to leader.
--- Make sure value is there.
+-- Set 'good' quorum on it and promote to a leader.
+-- Make sure value is there and on an old leader.

  -- Testcase setup.
  test_run:create_cluster(SERVERS)
@@ -35,28 +36,35 @@
  test_run:switch('qsync1')
  _ = box.schema.space.create('sync', {is_sync=true, engine = 
test_run:get_cfg('engine')})
  _ = box.space.sync:create_index('primary')
+box.schema.user.grant('guest', 'write', 'space', 'sync')

  test_run:switch('default')
  current_leader_id = 1
  test_run:eval(SERVERS[current_leader_id], 
"box.ctl.clear_synchro_queue()")

+SOCKET_DIR = require('fio').cwd()
+
  -- Testcase body.
-for i=1,300 do \
+for i=1,200 do \
test_run:eval(SERVERS[current_leader_id], \
-        string.format("box.space.sync:insert{%d}", 
i))                         \
+        "box.cfg{replication_synchro_quorum=6, 
replication_synchro_timeout=1000}") \
+    c = 
netbox.connect(SOCKET_DIR..'/'..SERVERS[current_leader_id]..'.sock') \
+    fiber.create(function() c.space.sync:insert{i} 
end)                        \
      new_leader_id = random(current_leader_id, 
#SERVERS)                        \
-    log.info(string.format("current leader id %d, new leader id 
%d",           \
-                           current_leader_id, 
new_leader_id))                  \
+ test_run:eval(SERVERS[new_leader_id], \
+        "box.cfg{replication_synchro_quorum=3, 
replication_synchro_timeout=0.01}") \
      test_run:eval(SERVERS[new_leader_id], 
"box.ctl.clear_synchro_queue()")     \
+ c:close() \
      replica = random(new_leader_id, 
#SERVERS)                                  \
      test_run:wait_cond(function() return 
test_run:eval(SERVERS[replica],       \
-                       string.format("box.space.sync:get{%d}", i)) ~= 
nil end) \
+                       string.format("box.space.sync:get{%d}", i))[1] 
~= nil end)  \
      test_run:wait_cond(function() return 
test_run:eval(SERVERS[current_leader_id], \
-                       string.format("box.space.sync:get{%d}", i)) ~= 
nil end) \
+                       string.format("box.space.sync:get{%d}", i))[1] 
~= nil end)  \
+    new_leader_id = random(current_leader_id, 
#SERVERS)                        \
      current_leader_id = 
new_leader_id                                          \
  end

  test_run:switch('qsync1')
-box.space.sync:count() -- 300
+box.space.sync:count() -- 200

  -- Teardown.
  test_run:switch('default')

>
>
>> +
>> +test_run:switch('qsync1')
>> +box.space.sync:count() -- 300
>> +
>> +-- Teardown.
>> +test_run:switch('default')
>> +test_run:eval(SERVERS[current_leader_id], 'box.space.sync:drop()')
>> +test_run:drop_cluster(SERVERS)
>> +box.cfg{ \
>> +    replication_synchro_quorum = 
>> orig_synchro_quorum,                          \
>> +    replication_synchro_timeout = 
>> orig_synchro_timeout,                        \
>> +}
>


More information about the Tarantool-patches mailing list