From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpng3.m.smailru.net (smtpng3.m.smailru.net [94.100.177.149]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id CEC3B469719 for ; Wed, 30 Sep 2020 01:11:42 +0300 (MSK) From: Vladislav Shpilevoy Date: Wed, 30 Sep 2020 00:11:23 +0200 Message-Id: In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH v3 10/10] raft: add tests List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org, sergepetrenko@tarantool.org Part of #1146 --- test/replication/election_basic.result | 278 +++++++++++++++++++++++ test/replication/election_basic.test.lua | 117 ++++++++++ test/replication/election_replica.lua | 30 +++ test/replication/election_replica1.lua | 1 + test/replication/election_replica2.lua | 1 + test/replication/election_replica3.lua | 1 + 6 files changed, 428 insertions(+) create mode 100644 test/replication/election_basic.result create mode 100644 test/replication/election_basic.test.lua create mode 100644 test/replication/election_replica.lua create mode 120000 test/replication/election_replica1.lua create mode 120000 test/replication/election_replica2.lua create mode 120000 test/replication/election_replica3.lua diff --git a/test/replication/election_basic.result b/test/replication/election_basic.result new file mode 100644 index 000000000..e59386f90 --- /dev/null +++ b/test/replication/election_basic.result @@ -0,0 +1,278 @@ +-- test-run result file version 2 +test_run = require('test_run').new() + | --- + | ... +-- +-- gh-1146: Raft protocol for automated leader election. +-- + +old_election_timeout = box.cfg_election_timeout + | --- + | ... + +-- Election is turned off by default. +assert(not box.cfg.election_is_enabled) + | --- + | - true + | ... +-- Is candidate by default. Although it does not matter, until election is +-- turned on. +assert(box.cfg.election_is_candidate) + | --- + | - true + | ... +-- Ensure election options are validated. +box.cfg{election_is_enabled = 100} + | --- + | - error: 'Incorrect value for option ''election_is_enabled'': should be of type boolean' + | ... +box.cfg{election_is_candidate = 100} + | --- + | - error: 'Incorrect value for option ''election_is_candidate'': should be of type + | boolean' + | ... +box.cfg{election_timeout = -1} + | --- + | - error: 'Incorrect value for option ''election_timeout'': the value must be a positive + | number' + | ... +box.cfg{election_timeout = 0} + | --- + | - error: 'Incorrect value for option ''election_timeout'': the value must be a positive + | number' + | ... + +-- When election is disabled, the instance is a follower. Does not try to become +-- a leader, and does not block write operations. +term = box.info.election.term + | --- + | ... +vote = box.info.election.vote + | --- + | ... +assert(box.info.election.state == 'follower') + | --- + | - true + | ... +assert(box.info.election.leader == 0) + | --- + | - true + | ... +assert(not box.info.ro) + | --- + | - true + | ... + +-- Turned on election blocks writes until the instance becomes a leader. +box.cfg{election_is_candidate = false} + | --- + | ... +box.cfg{election_is_enabled = true} + | --- + | ... +assert(box.info.election.state == 'follower') + | --- + | - true + | ... +assert(box.info.ro) + | --- + | - true + | ... +-- Term is not changed, because the instance can't be a candidate, +-- and therefore didn't try to vote nor to bump the term. +assert(box.info.election.term == term) + | --- + | - true + | ... +assert(box.info.election.vote == vote) + | --- + | - true + | ... +assert(box.info.election.leader == 0) + | --- + | - true + | ... + +-- Candidate instance votes immediately, if sees no leader. +box.cfg{election_timeout = 1000} + | --- + | ... +box.cfg{election_is_candidate = true} + | --- + | ... +test_run:wait_cond(function() return box.info.election.state == 'leader' end) + | --- + | - true + | ... +assert(box.info.election.term > term) + | --- + | - true + | ... +assert(box.info.election.vote == box.info.id) + | --- + | - true + | ... +assert(box.info.election.leader == box.info.id) + | --- + | - true + | ... + +box.cfg{ \ + election_is_enabled = false, \ + election_is_candidate = true, \ + election_timeout = old_election_timeout \ +} + | --- + | ... + +-- +-- See if bootstrap with election enabled works. +-- +SERVERS = {'election_replica1', 'election_replica2', 'election_replica3'} + | --- + | ... +test_run:create_cluster(SERVERS, "replication") + | --- + | ... +test_run:wait_fullmesh(SERVERS) + | --- + | ... +is_leader_cmd = 'return box.info.election.state == \'leader\'' + | --- + | ... +leader_id_cmd = 'return box.info.election.leader' + | --- + | ... +is_r1_leader = test_run:eval('election_replica1', is_leader_cmd)[1] + | --- + | ... +is_r2_leader = test_run:eval('election_replica2', is_leader_cmd)[1] + | --- + | ... +is_r3_leader = test_run:eval('election_replica3', is_leader_cmd)[1] + | --- + | ... +leader_count = is_r1_leader and 1 or 0 + | --- + | ... +leader_count = leader_count + (is_r2_leader and 1 or 0) + | --- + | ... +leader_count = leader_count + (is_r3_leader and 1 or 0) + | --- + | ... +assert(leader_count == 1) + | --- + | - true + | ... +-- All nodes have the same leader. +r1_leader = test_run:eval('election_replica1', leader_id_cmd)[1] + | --- + | ... +r2_leader = test_run:eval('election_replica2', leader_id_cmd)[1] + | --- + | ... +r3_leader = test_run:eval('election_replica3', leader_id_cmd)[1] + | --- + | ... +assert(r1_leader ~= 0) + | --- + | - true + | ... +assert(r1_leader == r2_leader) + | --- + | - true + | ... +assert(r1_leader == r3_leader) + | --- + | - true + | ... + +-- +-- Leader death starts a new election. +-- +leader_name = nil + | --- + | ... +nonleader1_name = nil + | --- + | ... +nonleader2_name = nil + | --- + | ... +if is_r1_leader then \ + leader_name = 'election_replica1' \ + nonleader1_name = 'election_replica2' \ + nonleader2_name = 'election_replica3' \ +elseif is_r2_leader then \ + leader_name = 'election_replica2' \ + nonleader1_name = 'election_replica1' \ + nonleader2_name = 'election_replica3' \ +else \ + leader_name = 'election_replica3' \ + nonleader1_name = 'election_replica1' \ + nonleader2_name = 'election_replica2' \ +end + | --- + | ... +-- Lower the quorum so the 2 alive nodes could elect a new leader when the third +-- node dies. +test_run:switch(nonleader1_name) + | --- + | - true + | ... +box.cfg{replication_synchro_quorum = 2} + | --- + | ... +-- Switch via default where the names are defined. +test_run:switch('default') + | --- + | - true + | ... +test_run:switch(nonleader2_name) + | --- + | - true + | ... +box.cfg{replication_synchro_quorum = 2} + | --- + | ... + +test_run:switch('default') + | --- + | - true + | ... +test_run:cmd(string.format('stop server %s', leader_name)) + | --- + | - true + | ... +test_run:wait_cond(function() \ + is_r1_leader = test_run:eval(nonleader1_name, is_leader_cmd)[1] \ + is_r2_leader = test_run:eval(nonleader2_name, is_leader_cmd)[1] \ + return is_r1_leader or is_r2_leader \ +end) + | --- + | - true + | ... +r1_leader = test_run:eval(nonleader1_name, leader_id_cmd)[1] + | --- + | ... +r2_leader = test_run:eval(nonleader2_name, leader_id_cmd)[1] + | --- + | ... +assert(r1_leader ~= 0) + | --- + | - true + | ... +assert(r1_leader == r2_leader) + | --- + | - true + | ... + +test_run:cmd(string.format('start server %s', leader_name)) + | --- + | - true + | ... + +test_run:drop_cluster(SERVERS) + | --- + | ... diff --git a/test/replication/election_basic.test.lua b/test/replication/election_basic.test.lua new file mode 100644 index 000000000..506d5ec4e --- /dev/null +++ b/test/replication/election_basic.test.lua @@ -0,0 +1,117 @@ +test_run = require('test_run').new() +-- +-- gh-1146: Raft protocol for automated leader election. +-- + +old_election_timeout = box.cfg_election_timeout + +-- Election is turned off by default. +assert(not box.cfg.election_is_enabled) +-- Is candidate by default. Although it does not matter, until election is +-- turned on. +assert(box.cfg.election_is_candidate) +-- Ensure election options are validated. +box.cfg{election_is_enabled = 100} +box.cfg{election_is_candidate = 100} +box.cfg{election_timeout = -1} +box.cfg{election_timeout = 0} + +-- When election is disabled, the instance is a follower. Does not try to become +-- a leader, and does not block write operations. +term = box.info.election.term +vote = box.info.election.vote +assert(box.info.election.state == 'follower') +assert(box.info.election.leader == 0) +assert(not box.info.ro) + +-- Turned on election blocks writes until the instance becomes a leader. +box.cfg{election_is_candidate = false} +box.cfg{election_is_enabled = true} +assert(box.info.election.state == 'follower') +assert(box.info.ro) +-- Term is not changed, because the instance can't be a candidate, +-- and therefore didn't try to vote nor to bump the term. +assert(box.info.election.term == term) +assert(box.info.election.vote == vote) +assert(box.info.election.leader == 0) + +-- Candidate instance votes immediately, if sees no leader. +box.cfg{election_timeout = 1000} +box.cfg{election_is_candidate = true} +test_run:wait_cond(function() return box.info.election.state == 'leader' end) +assert(box.info.election.term > term) +assert(box.info.election.vote == box.info.id) +assert(box.info.election.leader == box.info.id) + +box.cfg{ \ + election_is_enabled = false, \ + election_is_candidate = true, \ + election_timeout = old_election_timeout \ +} + +-- +-- See if bootstrap with election enabled works. +-- +SERVERS = {'election_replica1', 'election_replica2', 'election_replica3'} +test_run:create_cluster(SERVERS, "replication") +test_run:wait_fullmesh(SERVERS) +is_leader_cmd = 'return box.info.election.state == \'leader\'' +leader_id_cmd = 'return box.info.election.leader' +is_r1_leader = test_run:eval('election_replica1', is_leader_cmd)[1] +is_r2_leader = test_run:eval('election_replica2', is_leader_cmd)[1] +is_r3_leader = test_run:eval('election_replica3', is_leader_cmd)[1] +leader_count = is_r1_leader and 1 or 0 +leader_count = leader_count + (is_r2_leader and 1 or 0) +leader_count = leader_count + (is_r3_leader and 1 or 0) +assert(leader_count == 1) +-- All nodes have the same leader. +r1_leader = test_run:eval('election_replica1', leader_id_cmd)[1] +r2_leader = test_run:eval('election_replica2', leader_id_cmd)[1] +r3_leader = test_run:eval('election_replica3', leader_id_cmd)[1] +assert(r1_leader ~= 0) +assert(r1_leader == r2_leader) +assert(r1_leader == r3_leader) + +-- +-- Leader death starts a new election. +-- +leader_name = nil +nonleader1_name = nil +nonleader2_name = nil +if is_r1_leader then \ + leader_name = 'election_replica1' \ + nonleader1_name = 'election_replica2' \ + nonleader2_name = 'election_replica3' \ +elseif is_r2_leader then \ + leader_name = 'election_replica2' \ + nonleader1_name = 'election_replica1' \ + nonleader2_name = 'election_replica3' \ +else \ + leader_name = 'election_replica3' \ + nonleader1_name = 'election_replica1' \ + nonleader2_name = 'election_replica2' \ +end +-- Lower the quorum so the 2 alive nodes could elect a new leader when the third +-- node dies. +test_run:switch(nonleader1_name) +box.cfg{replication_synchro_quorum = 2} +-- Switch via default where the names are defined. +test_run:switch('default') +test_run:switch(nonleader2_name) +box.cfg{replication_synchro_quorum = 2} + +test_run:switch('default') +test_run:cmd(string.format('stop server %s', leader_name)) +test_run:wait_cond(function() \ + is_r1_leader = test_run:eval(nonleader1_name, is_leader_cmd)[1] \ + is_r2_leader = test_run:eval(nonleader2_name, is_leader_cmd)[1] \ + return is_r1_leader or is_r2_leader \ +end) +r1_leader = test_run:eval(nonleader1_name, leader_id_cmd)[1] +r2_leader = test_run:eval(nonleader2_name, leader_id_cmd)[1] +assert(r1_leader ~= 0) +assert(r1_leader == r2_leader) + +test_run:cmd(string.format('start server %s', leader_name)) + +test_run:drop_cluster(SERVERS) diff --git a/test/replication/election_replica.lua b/test/replication/election_replica.lua new file mode 100644 index 000000000..36ea1f077 --- /dev/null +++ b/test/replication/election_replica.lua @@ -0,0 +1,30 @@ +#!/usr/bin/env tarantool + +local INSTANCE_ID = string.match(arg[0], "%d") +local SOCKET_DIR = require('fio').cwd() + +local function instance_uri(instance_id) + return SOCKET_DIR..'/autobootstrap'..instance_id..'.sock'; +end + +require('console').listen(os.getenv('ADMIN')) + +box.cfg({ + listen = instance_uri(INSTANCE_ID), + replication = { + instance_uri(1), + instance_uri(2), + instance_uri(3), + }, + replication_timeout = 0.1, + election_is_enabled = true, + election_is_candidate = true, + election_timeout = 0.1, + replication_synchro_quorum = 3, + -- To reveal more election logs. + log_level = 6, +}) + +box.once("bootstrap", function() + box.schema.user.grant('guest', 'super') +end) diff --git a/test/replication/election_replica1.lua b/test/replication/election_replica1.lua new file mode 120000 index 000000000..61ba93fc8 --- /dev/null +++ b/test/replication/election_replica1.lua @@ -0,0 +1 @@ +election_replica.lua \ No newline at end of file diff --git a/test/replication/election_replica2.lua b/test/replication/election_replica2.lua new file mode 120000 index 000000000..61ba93fc8 --- /dev/null +++ b/test/replication/election_replica2.lua @@ -0,0 +1 @@ +election_replica.lua \ No newline at end of file diff --git a/test/replication/election_replica3.lua b/test/replication/election_replica3.lua new file mode 120000 index 000000000..61ba93fc8 --- /dev/null +++ b/test/replication/election_replica3.lua @@ -0,0 +1 @@ +election_replica.lua \ No newline at end of file -- 2.21.1 (Apple Git-122.3)