From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpng1.m.smailru.net (smtpng1.m.smailru.net [94.100.181.251]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 9CCB244643B for ; Tue, 22 Sep 2020 01:48:02 +0300 (MSK) From: Vladislav Shpilevoy References: <5a7cf2f2-1b7c-28be-688f-d604a64d0623@tarantool.org> Message-ID: Date: Tue, 22 Sep 2020 00:48:01 +0200 MIME-Version: 1.0 In-Reply-To: <5a7cf2f2-1b7c-28be-688f-d604a64d0623@tarantool.org> Content-Type: text/plain; charset=utf-8 Content-Language: en-US Content-Transfer-Encoding: 7bit Subject: Re: [Tarantool-patches] [PATCH v2 12/11] raft: add tests List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org, sergepetrenko@tarantool.org, gorcunov@gmail.com A new test in a separate commit on top of the branch. I also found that sometimes after bootstrap we have leader count != 1. But I didn't catch it again to see what was it equal. I suspect it was 0. Probably bootstrap was too long and led to election timeout at some point, so somebody bumped all the terms and made all nodes the followers. Probably need to replace some things with test_run:wait_cond() here. ==================== [tosquash] raft: add re-election test diff --git a/test/replication/raft_basic.result b/test/replication/raft_basic.result index 2996fe3eb..3421227fb 100644 --- a/test/replication/raft_basic.result +++ b/test/replication/raft_basic.result @@ -140,6 +140,9 @@ test_run:wait_fullmesh(SERVERS) is_leader_cmd = 'return box.info.raft.state == \'leader\'' | --- | ... +leader_id_cmd = 'return box.info.raft.leader' + | --- + | ... is_r1_leader = test_run:eval('raft_replica1', is_leader_cmd)[1] | --- | ... @@ -162,6 +165,114 @@ assert(leader_count == 1) | --- | - true | ... +-- All nodes have the same leader. +r1_leader = test_run:eval('raft_replica1', leader_id_cmd)[1] + | --- + | ... +r2_leader = test_run:eval('raft_replica2', leader_id_cmd)[1] + | --- + | ... +r3_leader = test_run:eval('raft_replica3', leader_id_cmd)[1] + | --- + | ... +assert(r1_leader ~= 0) + | --- + | - true + | ... +assert(r1_leader == r2_leader) + | --- + | - true + | ... +assert(r1_leader == r3_leader) + | --- + | - true + | ... + +-- +-- Leader death starts a new election. +-- +leader_name = nil + | --- + | ... +nonleader1_name = nil + | --- + | ... +nonleader2_name = nil + | --- + | ... +if is_r1_leader then \ + leader_name = 'raft_replica1' \ + nonleader1_name = 'raft_replica2' \ + nonleader2_name = 'raft_replica3' \ +elseif is_r2_leader then \ + leader_name = 'raft_replica2' \ + nonleader1_name = 'raft_replica1' \ + nonleader2_name = 'raft_replica3' \ +else \ + leader_name = 'raft_replica3' \ + nonleader1_name = 'raft_replica1' \ + nonleader2_name = 'raft_replica2' \ +end + | --- + | ... +-- Lower the quorum so the 2 alive nodes could elect a new leader when the third +-- node dies. +test_run:switch(nonleader1_name) + | --- + | - true + | ... +box.cfg{replication_synchro_quorum = 2} + | --- + | ... +-- Switch via default where the names are defined. +test_run:switch('default') + | --- + | - true + | ... +test_run:switch(nonleader2_name) + | --- + | - true + | ... +box.cfg{replication_synchro_quorum = 2} + | --- + | ... + +test_run:switch('default') + | --- + | - true + | ... +test_run:cmd(string.format('stop server %s', leader_name)) + | --- + | - true + | ... +test_run:wait_cond(function() \ + is_r1_leader = test_run:eval(nonleader1_name, is_leader_cmd)[1] \ + is_r2_leader = test_run:eval(nonleader2_name, is_leader_cmd)[1] \ + return is_r1_leader or is_r2_leader \ +end) + | --- + | - true + | ... +r1_leader = test_run:eval(nonleader1_name, leader_id_cmd)[1] + | --- + | ... +r2_leader = test_run:eval(nonleader2_name, leader_id_cmd)[1] + | --- + | ... +assert(r1_leader ~= 0) + | --- + | - true + | ... +assert(r1_leader == r2_leader) + | --- + | - true + | ... + +test_run:cmd(string.format('start server %s', leader_name)) + | --- + | - true + | ... + test_run:drop_cluster(SERVERS) | --- | ... diff --git a/test/replication/raft_basic.test.lua b/test/replication/raft_basic.test.lua index 7e7568991..b8e5a5eaf 100644 --- a/test/replication/raft_basic.test.lua +++ b/test/replication/raft_basic.test.lua @@ -57,6 +57,7 @@ SERVERS = {'raft_replica1', 'raft_replica2', 'raft_replica3'} test_run:create_cluster(SERVERS, "replication") test_run:wait_fullmesh(SERVERS) is_leader_cmd = 'return box.info.raft.state == \'leader\'' +leader_id_cmd = 'return box.info.raft.leader' is_r1_leader = test_run:eval('raft_replica1', is_leader_cmd)[1] is_r2_leader = test_run:eval('raft_replica2', is_leader_cmd)[1] is_r3_leader = test_run:eval('raft_replica3', is_leader_cmd)[1] @@ -64,4 +65,54 @@ leader_count = is_r1_leader and 1 or 0 leader_count = leader_count + (is_r2_leader and 1 or 0) leader_count = leader_count + (is_r3_leader and 1 or 0) assert(leader_count == 1) +-- All nodes have the same leader. +r1_leader = test_run:eval('raft_replica1', leader_id_cmd)[1] +r2_leader = test_run:eval('raft_replica2', leader_id_cmd)[1] +r3_leader = test_run:eval('raft_replica3', leader_id_cmd)[1] +assert(r1_leader ~= 0) +assert(r1_leader == r2_leader) +assert(r1_leader == r3_leader) + +-- +-- Leader death starts a new election. +-- +leader_name = nil +nonleader1_name = nil +nonleader2_name = nil +if is_r1_leader then \ + leader_name = 'raft_replica1' \ + nonleader1_name = 'raft_replica2' \ + nonleader2_name = 'raft_replica3' \ +elseif is_r2_leader then \ + leader_name = 'raft_replica2' \ + nonleader1_name = 'raft_replica1' \ + nonleader2_name = 'raft_replica3' \ +else \ + leader_name = 'raft_replica3' \ + nonleader1_name = 'raft_replica1' \ + nonleader2_name = 'raft_replica2' \ +end +-- Lower the quorum so the 2 alive nodes could elect a new leader when the third +-- node dies. +test_run:switch(nonleader1_name) +box.cfg{replication_synchro_quorum = 2} +-- Switch via default where the names are defined. +test_run:switch('default') +test_run:switch(nonleader2_name) +box.cfg{replication_synchro_quorum = 2} + +test_run:switch('default') +test_run:cmd(string.format('stop server %s', leader_name)) +test_run:wait_cond(function() \ + is_r1_leader = test_run:eval(nonleader1_name, is_leader_cmd)[1] \ + is_r2_leader = test_run:eval(nonleader2_name, is_leader_cmd)[1] \ + return is_r1_leader or is_r2_leader \ +end) +r1_leader = test_run:eval(nonleader1_name, leader_id_cmd)[1] +r2_leader = test_run:eval(nonleader2_name, leader_id_cmd)[1] +assert(r1_leader ~= 0) +assert(r1_leader == r2_leader) + +test_run:cmd(string.format('start server %s', leader_name)) + test_run:drop_cluster(SERVERS)