Tarantool development patches archive
 help / color / mirror / Atom feed
* [Tarantool-patches] [PATCH] raft: add a test with synchronous replication
@ 2020-10-02 10:33 Serge Petrenko
  2020-10-04 13:54 ` Vladislav Shpilevoy
  2020-10-06 10:04 ` Kirill Yukhin
  0 siblings, 2 replies; 6+ messages in thread
From: Serge Petrenko @ 2020-10-02 10:33 UTC (permalink / raw)
  To: v.shpilevoy; +Cc: tarantool-patches

---
Branch: https://github.com/tarantool/tarantool/tree/sp/raft-qsync-test

The test is relatively long (runs for 10 seconds on my machine).
but I still think it's worth having it, at least under --long option (I  haven't
added it to long_run list yet).

 test/replication/election_qsync.result   | 125 +++++++++++++++++++++++
 test/replication/election_qsync.test.lua |  70 +++++++++++++
 test/replication/election_replica.lua    |  10 +-
 3 files changed, 202 insertions(+), 3 deletions(-)
 create mode 100644 test/replication/election_qsync.result
 create mode 100644 test/replication/election_qsync.test.lua

diff --git a/test/replication/election_qsync.result b/test/replication/election_qsync.result
new file mode 100644
index 000000000..1bf13d7bc
--- /dev/null
+++ b/test/replication/election_qsync.result
@@ -0,0 +1,125 @@
+-- test-run result file version 2
+test_run = require('test_run').new()
+ | ---
+ | ...
+netbox = require('net.box')
+ | ---
+ | ...
+
+--
+-- gh-1146: Leader election + Qsync
+--
+test_run:cmd('setopt delimiter ";"')
+ | ---
+ | - true
+ | ...
+function get_leader(nrs)
+    local is_leader_cmd = 'return box.info.election.state == \'leader\''
+    local leader_nr = 0
+    test_run:wait_cond(function()
+        local leader_count = 0
+        for nr, do_check in pairs(nrs) do
+            if do_check then
+                local is_leader = test_run:eval('election_replica'..nr,
+                                                is_leader_cmd)[1]
+                if is_leader then
+                    leader_count = leader_count + 1
+                    leader_nr = nr
+                end
+                assert(leader_count <= 1)
+            end
+        end
+        return leader_count == 1
+    end)
+    return leader_nr
+end;
+ | ---
+ | ...
+
+test_run:cmd('setopt delimiter ""');
+ | ---
+ | - true
+ | ...
+
+SERVERS = {'election_replica1', 'election_replica2', 'election_replica3'}
+ | ---
+ | ...
+test_run:create_cluster(SERVERS, "replication", {args='2'})
+ | ---
+ | ...
+test_run:wait_fullmesh(SERVERS)
+ | ---
+ | ...
+
+nrs = {true, true, true}
+ | ---
+ | ...
+old_leader_nr = get_leader(nrs)
+ | ---
+ | ...
+old_leader = 'election_replica'..old_leader_nr
+ | ---
+ | ...
+leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
+ | ---
+ | ...
+c = netbox.connect(leader_port)
+ | ---
+ | ...
+
+_ = c:eval('box.schema.space.create("test", {is_sync=true})')
+ | ---
+ | ...
+_ = c:eval('box.space.test:create_index("pk")')
+ | ---
+ | ...
+
+-- Insert some data to a synchronous space, then kill the leader before the
+-- confirmation is written. Check successful confirmation on the new leader.
+test_run:cmd('setopt delimiter ";"')
+ | ---
+ | - true
+ | ...
+for i = 1,10 do
+    c:eval('box.cfg{replication_synchro_quorum=4, replication_synchro_timeout=1000}')
+    c.space.test:insert({i}, {is_async=true})
+    test_run:wait_cond(function() return c.space.test:get{i} ~= nil end)
+    test_run:cmd('stop server '..old_leader)
+    nrs[old_leader_nr] = false
+    new_leader_nr = get_leader(nrs)
+    new_leader = 'election_replica'..new_leader_nr
+    leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
+    c = netbox.connect(leader_port)
+    c:eval('box.ctl.clear_synchro_queue()')
+    c:eval('box.cfg{replication_synchro_timeout=1000}')
+    c.space._schema:replace{'smth'}
+    c.space.test:get{i}
+    test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2"')
+    nrs[old_leader_nr] = true
+    old_leader_nr = new_leader_nr
+    old_leader = new_leader
+end;
+ | ---
+ | ...
+test_run:cmd('setopt delimiter ""');
+ | ---
+ | - true
+ | ...
+-- We're connected to some leader.
+c.space.test:select{}
+ | ---
+ | - - [1]
+ |   - [2]
+ |   - [3]
+ |   - [4]
+ |   - [5]
+ |   - [6]
+ |   - [7]
+ |   - [8]
+ |   - [9]
+ |   - [10]
+ | ...
+
+test_run:drop_cluster(SERVERS)
+ | ---
+ | ...
diff --git a/test/replication/election_qsync.test.lua b/test/replication/election_qsync.test.lua
new file mode 100644
index 000000000..f069c71bb
--- /dev/null
+++ b/test/replication/election_qsync.test.lua
@@ -0,0 +1,70 @@
+test_run = require('test_run').new()
+netbox = require('net.box')
+
+--
+-- gh-1146: Leader election + Qsync
+--
+test_run:cmd('setopt delimiter ";"')
+function get_leader(nrs)
+    local is_leader_cmd = 'return box.info.election.state == \'leader\''
+    local leader_nr = 0
+    test_run:wait_cond(function()
+        local leader_count = 0
+        for nr, do_check in pairs(nrs) do
+            if do_check then
+                local is_leader = test_run:eval('election_replica'..nr,
+                                                is_leader_cmd)[1]
+                if is_leader then
+                    leader_count = leader_count + 1
+                    leader_nr = nr
+                end
+                assert(leader_count <= 1)
+            end
+        end
+        return leader_count == 1
+    end)
+    return leader_nr
+end;
+
+test_run:cmd('setopt delimiter ""');
+
+SERVERS = {'election_replica1', 'election_replica2', 'election_replica3'}
+test_run:create_cluster(SERVERS, "replication", {args='2'})
+test_run:wait_fullmesh(SERVERS)
+
+nrs = {true, true, true}
+old_leader_nr = get_leader(nrs)
+old_leader = 'election_replica'..old_leader_nr
+leader_port = test_run:eval(old_leader, 'box.cfg.listen')[1]
+c = netbox.connect(leader_port)
+
+_ = c:eval('box.schema.space.create("test", {is_sync=true})')
+_ = c:eval('box.space.test:create_index("pk")')
+
+-- Insert some data to a synchronous space, then kill the leader before the
+-- confirmation is written. Check successful confirmation on the new leader.
+test_run:cmd('setopt delimiter ";"')
+for i = 1,10 do
+    c:eval('box.cfg{replication_synchro_quorum=4, replication_synchro_timeout=1000}')
+    c.space.test:insert({i}, {is_async=true})
+    test_run:wait_cond(function() return c.space.test:get{i} ~= nil end)
+    test_run:cmd('stop server '..old_leader)
+    nrs[old_leader_nr] = false
+    new_leader_nr = get_leader(nrs)
+    new_leader = 'election_replica'..new_leader_nr
+    leader_port = test_run:eval(new_leader, 'box.cfg.listen')[1]
+    c = netbox.connect(leader_port)
+    c:eval('box.ctl.clear_synchro_queue()')
+    c:eval('box.cfg{replication_synchro_timeout=1000}')
+    c.space._schema:replace{'smth'}
+    c.space.test:get{i}
+    test_run:cmd('start server '..old_leader..' with wait=True, wait_load=True, args="2"')
+    nrs[old_leader_nr] = true
+    old_leader_nr = new_leader_nr
+    old_leader = new_leader
+end;
+test_run:cmd('setopt delimiter ""');
+-- We're connected to some leader.
+c.space.test:select{}
+
+test_run:drop_cluster(SERVERS)
diff --git a/test/replication/election_replica.lua b/test/replication/election_replica.lua
index 36ea1f077..887d8a2a0 100644
--- a/test/replication/election_replica.lua
+++ b/test/replication/election_replica.lua
@@ -2,9 +2,10 @@
 
 local INSTANCE_ID = string.match(arg[0], "%d")
 local SOCKET_DIR = require('fio').cwd()
+local SYNCHRO_QUORUM = arg[1] and tonumber(arg[1]) or 3
 
 local function instance_uri(instance_id)
-    return SOCKET_DIR..'/autobootstrap'..instance_id..'.sock';
+    return SOCKET_DIR..'/election_replica'..instance_id..'.sock';
 end
 
 require('console').listen(os.getenv('ADMIN'))
@@ -19,8 +20,11 @@ box.cfg({
     replication_timeout = 0.1,
     election_is_enabled = true,
     election_is_candidate = true,
-    election_timeout = 0.1,
-    replication_synchro_quorum = 3,
+    -- Should be at least as big as replication_disconnect_timeout, which is
+    -- 4 * replication_timeout.
+    election_timeout = 0.4,
+    replication_synchro_quorum = SYNCHRO_QUORUM,
+    replication_synchro_timeout = 0.1,
     -- To reveal more election logs.
     log_level = 6,
 })
-- 
2.24.3 (Apple Git-128)

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Tarantool-patches] [PATCH] raft: add a test with synchronous replication
  2020-10-02 10:33 [Tarantool-patches] [PATCH] raft: add a test with synchronous replication Serge Petrenko
@ 2020-10-04 13:54 ` Vladislav Shpilevoy
  2020-10-05  8:52   ` Serge Petrenko
  2020-10-06 10:04 ` Kirill Yukhin
  1 sibling, 1 reply; 6+ messages in thread
From: Vladislav Shpilevoy @ 2020-10-04 13:54 UTC (permalink / raw)
  To: Serge Petrenko; +Cc: tarantool-patches

Hi! Thanks for the patch!

> diff --git a/test/replication/election_qsync.result b/test/replication/election_qsync.result
> new file mode 100644
> index 000000000..1bf13d7bc
> --- /dev/null
> +++ b/test/replication/election_qsync.result
> @@ -0,0 +1,125 @@
> +SERVERS = {'election_replica1', 'election_replica2', 'election_replica3'}
> + | ---
> + | ...
> +test_run:create_cluster(SERVERS, "replication", {args='2'})
> + | ---
> + | ...
> +test_run:wait_fullmesh(SERVERS)
> + | ---
> + | ...
> +
> +nrs = {true, true, true}

1. What is 'nrs'?

> + | ---
> + | ...
> diff --git a/test/replication/election_replica.lua b/test/replication/election_replica.lua
> index 36ea1f077..887d8a2a0 100644
> --- a/test/replication/election_replica.lua
> +++ b/test/replication/election_replica.lua
> @@ -19,8 +20,11 @@ box.cfg({
>      replication_timeout = 0.1,
>      election_is_enabled = true,
>      election_is_candidate = true,
> -    election_timeout = 0.1,
> -    replication_synchro_quorum = 3,
> +    -- Should be at least as big as replication_disconnect_timeout, which is
> +    -- 4 * replication_timeout.
> +    election_timeout = 0.4,

2. Why? Election timeout has nothing to do with disconnect. It is about
split vote. This also will slow down raft_basic.test.lua, which is not
supposed to be long. For heartbeat timeouts Raft already uses
replication_disconnect_timeout = replication_timeout * 4.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Tarantool-patches] [PATCH] raft: add a test with synchronous replication
  2020-10-04 13:54 ` Vladislav Shpilevoy
@ 2020-10-05  8:52   ` Serge Petrenko
  2020-10-05 21:40     ` Vladislav Shpilevoy
  0 siblings, 1 reply; 6+ messages in thread
From: Serge Petrenko @ 2020-10-05  8:52 UTC (permalink / raw)
  To: Vladislav Shpilevoy; +Cc: tarantool-patches


04.10.2020 16:54, Vladislav Shpilevoy пишет:
> Hi! Thanks for the patch!
Hi! Thanks for the review!
>
>> diff --git a/test/replication/election_qsync.result b/test/replication/election_qsync.result
>> new file mode 100644
>> index 000000000..1bf13d7bc
>> --- /dev/null
>> +++ b/test/replication/election_qsync.result
>> @@ -0,0 +1,125 @@
>> +SERVERS = {'election_replica1', 'election_replica2', 'election_replica3'}
>> + | ---
>> + | ...
>> +test_run:create_cluster(SERVERS, "replication", {args='2'})
>> + | ---
>> + | ...
>> +test_run:wait_fullmesh(SERVERS)
>> + | ---
>> + | ...
>> +
>> +nrs = {true, true, true}
> 1. What is 'nrs'?

I'll add a comment.  nrs is a parameter passed to get_leader().
nrs = {true, true, true} means check whether any of the 3 instances is a 
leader.
Once I kill the former leader, nrs[former_leader_nr]  = false, and I 
wait till one  of the
two instances left becomes a leader.

nrs is short for "numbers", I guess.

>
>> + | ---
>> + | ...
>> diff --git a/test/replication/election_replica.lua b/test/replication/election_replica.lua
>> index 36ea1f077..887d8a2a0 100644
>> --- a/test/replication/election_replica.lua
>> +++ b/test/replication/election_replica.lua
>> @@ -19,8 +20,11 @@ box.cfg({
>>       replication_timeout = 0.1,
>>       election_is_enabled = true,
>>       election_is_candidate = true,
>> -    election_timeout = 0.1,
>> -    replication_synchro_quorum = 3,
>> +    -- Should be at least as big as replication_disconnect_timeout, which is
>> +    -- 4 * replication_timeout.
>> +    election_timeout = 0.4,
> 2. Why? Election timeout has nothing to do with disconnect. It is about
> split vote. This also will slow down raft_basic.test.lua, which is not
> supposed to be long. For heartbeat timeouts Raft already uses
> replication_disconnect_timeout = replication_timeout * 4.

I've seen cases when a leader is elected, but doesn't send out the 
is_leader flag
in time, so new elections start over and over again. This only happened 
when the
tests were run in parallel, so the problem was probably in high load.

So, my logic was that if we wait for 4 times replication timeout for the 
leader to
come back why not wait for 4 * replication timeout for the leader to 
establish
its leadership.

I mean, if it's considered a normal situation when a leader disappears 
for not more
than 4 * replication_timeout, and this doesn't trigger an election, why 
should
elections end before at least 4 * replication_timeout seconds pass?

By the way, the raft paper doesn't have a separate leader disconnect 
timeout. The
same election timeout is used for this purpose. So that's another 
argument for
setting election_timeout to at least 4 * replication_timeout.

Speaking of raft_basic.test.lua becoming slow, let's pass 
election_timeout as an
argument to replica, just like I do it for replication_synchro_quorum.


Here are the changes:


diff --git a/test/replication/election_qsync.result 
b/test/replication/election_qsync.result
index 1bf13d7bc..9497b37bf 100644
--- a/test/replication/election_qsync.result
+++ b/test/replication/election_qsync.result
@@ -44,13 +44,16 @@ test_run:cmd('setopt delimiter ""');
  SERVERS = {'election_replica1', 'election_replica2', 'election_replica3'}
   | ---
   | ...
-test_run:create_cluster(SERVERS, "replication", {args='2'})
+test_run:create_cluster(SERVERS, "replication", {args='2 0.4'})
   | ---
   | ...
  test_run:wait_fullmesh(SERVERS)
   | ---
   | ...

+-- Any of the three instances may be the leader now.
+-- When the former leader is killed, we expect one of the two instances 
left
+-- to become a leader, so nrs[former_leader_nr] = false.
  nrs = {true, true, true}
   | ---
   | ...
@@ -94,7 +97,7 @@ for i = 1,10 do
      c:eval('box.cfg{replication_synchro_timeout=1000}')
      c.space._schema:replace{'smth'}
      c.space.test:get{i}
-    test_run:cmd('start server '..old_leader..' with wait=True, 
wait_load=True, args="2"')
+    test_run:cmd('start server '..old_leader..' with wait=True, 
wait_load=True, args="2 0.4"')
      nrs[old_leader_nr] = true
      old_leader_nr = new_leader_nr
      old_leader = new_leader
diff --git a/test/replication/election_qsync.test.lua 
b/test/replication/election_qsync.test.lua
index f069c71bb..bca1b20c7 100644
--- a/test/replication/election_qsync.test.lua
+++ b/test/replication/election_qsync.test.lua
@@ -29,9 +29,12 @@ end;
  test_run:cmd('setopt delimiter ""');

  SERVERS = {'election_replica1', 'election_replica2', 'election_replica3'}
-test_run:create_cluster(SERVERS, "replication", {args='2'})
+test_run:create_cluster(SERVERS, "replication", {args='2 0.4'})
  test_run:wait_fullmesh(SERVERS)

+-- Any of the three instances may be the leader now.
+-- When the former leader is killed, we expect one of the two instances 
left
+-- to become a leader, so nrs[former_leader_nr] = false.
  nrs = {true, true, true}
  old_leader_nr = get_leader(nrs)
  old_leader = 'election_replica'..old_leader_nr
@@ -58,7 +61,7 @@ for i = 1,10 do
      c:eval('box.cfg{replication_synchro_timeout=1000}')
      c.space._schema:replace{'smth'}
      c.space.test:get{i}
-    test_run:cmd('start server '..old_leader..' with wait=True, 
wait_load=True, args="2"')
+    test_run:cmd('start server '..old_leader..' with wait=True, 
wait_load=True, args="2 0.4"')
      nrs[old_leader_nr] = true
      old_leader_nr = new_leader_nr
      old_leader = new_leader
diff --git a/test/replication/election_replica.lua 
b/test/replication/election_replica.lua
index 887d8a2a0..b7d1aebe7 100644
--- a/test/replication/election_replica.lua
+++ b/test/replication/election_replica.lua
@@ -3,6 +3,7 @@
  local INSTANCE_ID = string.match(arg[0], "%d")
  local SOCKET_DIR = require('fio').cwd()
  local SYNCHRO_QUORUM = arg[1] and tonumber(arg[1]) or 3
+local ELECTION_TIMEOUT = arg[2] and tonumber(arg[2]) or 0.1

  local function instance_uri(instance_id)
      return SOCKET_DIR..'/election_replica'..instance_id..'.sock';
@@ -20,9 +21,7 @@ box.cfg({
      replication_timeout = 0.1,
      election_is_enabled = true,
      election_is_candidate = true,
-    -- Should be at least as big as replication_disconnect_timeout, 
which is
-    -- 4 * replication_timeout.
-    election_timeout = 0.4,
+    election_timeout = ELECTION_TIMEOUT,
      replication_synchro_quorum = SYNCHRO_QUORUM,
      replication_synchro_timeout = 0.1,
      -- To reveal more election logs.


-- 
Serge Petrenko

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Tarantool-patches] [PATCH] raft: add a test with synchronous replication
  2020-10-05  8:52   ` Serge Petrenko
@ 2020-10-05 21:40     ` Vladislav Shpilevoy
  2020-10-06  7:30       ` Serge Petrenko
  0 siblings, 1 reply; 6+ messages in thread
From: Vladislav Shpilevoy @ 2020-10-05 21:40 UTC (permalink / raw)
  To: Serge Petrenko; +Cc: tarantool-patches

Hi! Thanks for the fixes!

>>> + | ---
>>> + | ...
>>> diff --git a/test/replication/election_replica.lua b/test/replication/election_replica.lua
>>> index 36ea1f077..887d8a2a0 100644
>>> --- a/test/replication/election_replica.lua
>>> +++ b/test/replication/election_replica.lua
>>> @@ -19,8 +20,11 @@ box.cfg({
>>>       replication_timeout = 0.1,
>>>       election_is_enabled = true,
>>>       election_is_candidate = true,
>>> -    election_timeout = 0.1,
>>> -    replication_synchro_quorum = 3,
>>> +    -- Should be at least as big as replication_disconnect_timeout, which is
>>> +    -- 4 * replication_timeout.
>>> +    election_timeout = 0.4,
>> 2. Why? Election timeout has nothing to do with disconnect. It is about
>> split vote. This also will slow down raft_basic.test.lua, which is not
>> supposed to be long. For heartbeat timeouts Raft already uses
>> replication_disconnect_timeout = replication_timeout * 4.
> 
> I've seen cases when a leader is elected, but doesn't send out the is_leader flag
> in time, so new elections start over and over again. This only happened when the
> tests were run in parallel, so the problem was probably in high load.

It should not be a problem. 100ms is enough to eventually elect a leader when the
instances run on the same machine. Several election attempts should not lead to
a test fail. Because even 0.4 may lead to that. It is not a guaranteed protection.

> So, my logic was that if we wait for 4 times replication timeout for the leader to
> come back why not wait for 4 * replication timeout for the leader to establish
> its leadership.
> 
> I mean, if it's considered a normal situation when a leader disappears for not more
> than 4 * replication_timeout, and this doesn't trigger an election, why should
> elections end before at least 4 * replication_timeout seconds pass?

Because it is safe to retry it, and it is normal due to split vote possibility.

> By the way, the raft paper doesn't have a separate leader disconnect timeout. The
> same election timeout is used for this purpose. So that's another argument for
> setting election_timeout to at least 4 * replication_timeout.

But I see your point. I started a discussion with other participants. It is
likely we will remove election_timeout option and use replication death timeout
instead.

Also we will probably drop election_is_enabled and election_is_candidate, and
replace them with a new option election_mode, which is a string: either 'off',
or 'candidate', or 'voter'. Another alternative - 'off' / 'on' / 'voter'.
Or 'voter' -> 'only_vote'. Idk yet. Anyway it looks better than 2 flags, I think.

The patch LGTM. However it seems your didn't push the update on the branch.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Tarantool-patches] [PATCH] raft: add a test with synchronous replication
  2020-10-05 21:40     ` Vladislav Shpilevoy
@ 2020-10-06  7:30       ` Serge Petrenko
  0 siblings, 0 replies; 6+ messages in thread
From: Serge Petrenko @ 2020-10-06  7:30 UTC (permalink / raw)
  To: Vladislav Shpilevoy; +Cc: tarantool-patches


06.10.2020 00:40, Vladislav Shpilevoy пишет:
> Hi! Thanks for the fixes!
Thanks for the  review!
>
>>>> + | ---
>>>> + | ...
>>>> diff --git a/test/replication/election_replica.lua b/test/replication/election_replica.lua
>>>> index 36ea1f077..887d8a2a0 100644
>>>> --- a/test/replication/election_replica.lua
>>>> +++ b/test/replication/election_replica.lua
>>>> @@ -19,8 +20,11 @@ box.cfg({
>>>>        replication_timeout = 0.1,
>>>>        election_is_enabled = true,
>>>>        election_is_candidate = true,
>>>> -    election_timeout = 0.1,
>>>> -    replication_synchro_quorum = 3,
>>>> +    -- Should be at least as big as replication_disconnect_timeout, which is
>>>> +    -- 4 * replication_timeout.
>>>> +    election_timeout = 0.4,
>>> 2. Why? Election timeout has nothing to do with disconnect. It is about
>>> split vote. This also will slow down raft_basic.test.lua, which is not
>>> supposed to be long. For heartbeat timeouts Raft already uses
>>> replication_disconnect_timeout = replication_timeout * 4.
>> I've seen cases when a leader is elected, but doesn't send out the is_leader flag
>> in time, so new elections start over and over again. This only happened when the
>> tests were run in parallel, so the problem was probably in high load.
> It should not be a problem. 100ms is enough to eventually elect a leader when the
> instances run on the same machine. Several election attempts should not lead to
> a test fail. Because even 0.4 may lead to that. It is not a guaranteed protection.
>
>> So, my logic was that if we wait for 4 times replication timeout for the leader to
>> come back why not wait for 4 * replication timeout for the leader to establish
>> its leadership.
>>
>> I mean, if it's considered a normal situation when a leader disappears for not more
>> than 4 * replication_timeout, and this doesn't trigger an election, why should
>> elections end before at least 4 * replication_timeout seconds pass?
> Because it is safe to retry it, and it is normal due to split vote possibility.
>
>> By the way, the raft paper doesn't have a separate leader disconnect timeout. The
>> same election timeout is used for this purpose. So that's another argument for
>> setting election_timeout to at least 4 * replication_timeout.
> But I see your point. I started a discussion with other participants. It is
> likely we will remove election_timeout option and use replication death timeout
> instead.
This might be reasonable. It looks like detecting a split vote and 
ending an election
early isn't that hard since the instances send out their votes to every 
cluster member.
>
> Also we will probably drop election_is_enabled and election_is_candidate, and
> replace them with a new option election_mode, which is a string: either 'off',
> or 'candidate', or 'voter'. Another alternative - 'off' / 'on' / 'voter'.
> Or 'voter' -> 'only_vote'. Idk yet. Anyway it looks better than 2 flags, I think.
Yeah, sounds good.
>
> The patch LGTM. However it seems your didn't push the update on the branch.
Oh, my bad. Fixed now.

-- 
Serge Petrenko

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Tarantool-patches] [PATCH] raft: add a test with synchronous replication
  2020-10-02 10:33 [Tarantool-patches] [PATCH] raft: add a test with synchronous replication Serge Petrenko
  2020-10-04 13:54 ` Vladislav Shpilevoy
@ 2020-10-06 10:04 ` Kirill Yukhin
  1 sibling, 0 replies; 6+ messages in thread
From: Kirill Yukhin @ 2020-10-06 10:04 UTC (permalink / raw)
  To: Serge Petrenko; +Cc: tarantool-patches, v.shpilevoy

Hello,

On 02 окт 13:33, Serge Petrenko wrote:
> ---
> Branch: https://github.com/tarantool/tarantool/tree/sp/raft-qsync-test
> 
> The test is relatively long (runs for 10 seconds on my machine).
> but I still think it's worth having it, at least under --long option (I  haven't
> added it to long_run list yet).
> 
>  test/replication/election_qsync.result   | 125 +++++++++++++++++++++++
>  test/replication/election_qsync.test.lua |  70 +++++++++++++
>  test/replication/election_replica.lua    |  10 +-
>  3 files changed, 202 insertions(+), 3 deletions(-)
>  create mode 100644 test/replication/election_qsync.result
>  create mode 100644 test/replication/election_qsync.test.lua

I've checked your patch into master.

--
Regards, Kirill Yukhin

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-10-06 10:04 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-02 10:33 [Tarantool-patches] [PATCH] raft: add a test with synchronous replication Serge Petrenko
2020-10-04 13:54 ` Vladislav Shpilevoy
2020-10-05  8:52   ` Serge Petrenko
2020-10-05 21:40     ` Vladislav Shpilevoy
2020-10-06  7:30       ` Serge Petrenko
2020-10-06 10:04 ` Kirill Yukhin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox