From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtp37.i.mail.ru (smtp37.i.mail.ru [94.100.177.97]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id 1344243040C for ; Fri, 4 Sep 2020 12:07:52 +0300 (MSK) References: From: Serge Petrenko Message-ID: <5811bf2d-a81e-721c-5da1-719a3dc7b5e9@tarantool.org> Date: Fri, 4 Sep 2020 12:07:51 +0300 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset="utf-8"; format="flowed" Content-Transfer-Encoding: 8bit Content-Language: en-GB Subject: Re: [Tarantool-patches] [PATCH v2 06/10] raft: introduce box.cfg.raft_* options List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Vladislav Shpilevoy , tarantool-patches@dev.tarantool.org, gorcunov@gmail.com 04.09.2020 01:51, Vladislav Shpilevoy пишет: > The new options are: > > - raft_is_enabled - enable/disable Raft. When disabled, the node > is supposed to work like if Raft does not exist. Like earlier; > > - raft_is_candidate - a flag whether the instance can try to > become a leader. Note, it can vote for other nodes regardless of > value of this option; > > - raft_election_timeout - how long need to wait until election > end, in seconds. > > The options don't do anything now. They are added separately in > order to keep such mundane changes from the main Raft commit, to > simplify its review. > > Part of #1146 Thanks for the patch! Please see my comment below. > --- > src/box/box.cc | 91 +++++++++++++++++++++++++++++++++ > src/box/box.h | 3 ++ > src/box/lua/cfg.cc | 27 ++++++++++ > src/box/lua/load_cfg.lua | 15 ++++++ > src/box/raft.c | 30 +++++++++++ > src/box/raft.h | 35 +++++++++++++ > test/app-tap/init_script.result | 3 ++ > test/box/admin.result | 6 +++ > test/box/cfg.result | 12 +++++ > 9 files changed, 222 insertions(+) > > diff --git a/src/box/box.cc b/src/box/box.cc > index 281917af2..5f04a1a78 100644 > --- a/src/box/box.cc > +++ b/src/box/box.cc > @@ -472,6 +472,40 @@ box_check_uri(const char *source, const char *option_name) > } > } > > +static int > +box_check_raft_is_enabled(void) > +{ > + int b = cfg_getb("raft_is_enabled"); > + if (b < 0) { > + diag_set(ClientError, ER_CFG, "raft_is_enabled", > + "the value must be a boolean"); > + } > + return b; > +} > + > +static int > +box_check_raft_is_candidate(void) > +{ > + int b = cfg_getb("raft_is_candidate"); > + if (b < 0) { > + diag_set(ClientError, ER_CFG, "raft_is_candidate", > + "the value must be a boolean"); > + } > + return b; > +} > + > +static double > +box_check_raft_election_timeout(void) > +{ > + double d = cfg_getd("raft_election_timeout"); > + if (d == 0) { Should be "d <= 0" here? Otherwise you end up with a diag_raise without appropriate diag_set when raft_election_timeout is negative. > + diag_set(ClientError, ER_CFG, "raft_election_timeout", > + "the value must be a positive number"); > + return -1; > + } > + return d; > +} > + > static void > box_check_replication(void) > { > @@ -729,6 +763,12 @@ box_check_config(void) > box_check_uri(cfg_gets("listen"), "listen"); > box_check_instance_uuid(&uuid); > box_check_replicaset_uuid(&uuid); > + if (box_check_raft_is_enabled() < 0) > + diag_raise(); > + if (box_check_raft_is_candidate() < 0) > + diag_raise(); > + if (box_check_raft_election_timeout() < 0) > + diag_raise(); > box_check_replication(); > box_check_replication_timeout(); > box_check_replication_connect_timeout(); > @@ -751,6 +791,36 @@ box_check_config(void) > diag_raise(); > } > > +int > +box_set_raft_is_enabled(void) > +{ > + int b = box_check_raft_is_enabled(); > + if (b < 0) > + return -1; > + raft_cfg_is_enabled(b); > + return 0; > +} > + > +int > +box_set_raft_is_candidate(void) > +{ > + int b = box_check_raft_is_candidate(); > + if (b < 0) > + return -1; > + raft_cfg_is_candidate(b); > + return 0; > +} > + > +int > +box_set_raft_election_timeout(void) > +{ > + double d = box_check_raft_election_timeout(); > + if (d < 0) > + return -1; > + raft_cfg_election_timeout(d); > + return 0; > +} > + > /* > * Parse box.cfg.replication and create appliers. > */ > @@ -835,6 +905,7 @@ void > box_set_replication_timeout(void) > { > replication_timeout = box_check_replication_timeout(); > + raft_cfg_death_timeout(); > } > > void > @@ -865,6 +936,7 @@ box_set_replication_synchro_quorum(void) > return -1; > replication_synchro_quorum = value; > txn_limbo_on_parameters_change(&txn_limbo); > + raft_cfg_election_quorum(); > return 0; > } > > @@ -2680,6 +2752,25 @@ box_cfg_xc(void) > > fiber_gc(); > is_box_configured = true; > + /* > + * Fill in Raft parameters after bootstrap. Before it is not possible - > + * there may be Raft data to recover from WAL and snapshot. Also until > + * recovery is done, it is not possible to write new records into WAL. > + * It is also totally safe, because relaying is not started until the > + * box is configured. So it can't happen, that this Raft node will try > + * to relay to another Raft node without Raft enabled leading to > + * disconnect. > + */ > + if (box_set_raft_is_candidate() != 0) > + diag_raise(); > + if (box_set_raft_election_timeout() != 0) > + diag_raise(); > + /* > + * Raft is enabled last. So as all the parameters are installed by that > + * time. > + */ > + if (box_set_raft_is_enabled() != 0) > + diag_raise(); > > title("running"); > say_info("ready to accept requests"); > diff --git a/src/box/box.h b/src/box/box.h > index 5988264a5..637d10dd3 100644 > --- a/src/box/box.h > +++ b/src/box/box.h > @@ -245,6 +245,9 @@ void box_set_vinyl_memory(void); > void box_set_vinyl_max_tuple_size(void); > void box_set_vinyl_cache(void); > void box_set_vinyl_timeout(void); > +int box_set_raft_is_enabled(void); > +int box_set_raft_is_candidate(void); > +int box_set_raft_election_timeout(void); > void box_set_replication_timeout(void); > void box_set_replication_connect_timeout(void); > void box_set_replication_connect_quorum(void); > diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc > index d481155cd..339b85f9d 100644 > --- a/src/box/lua/cfg.cc > +++ b/src/box/lua/cfg.cc > @@ -269,6 +269,30 @@ lbox_cfg_set_worker_pool_threads(struct lua_State *L) > return 0; > } > > +static int > +lbox_cfg_set_raft_is_enabled(struct lua_State *L) > +{ > + if (box_set_raft_is_enabled() != 0) > + luaT_error(L); > + return 0; > +} > + > +static int > +lbox_cfg_set_raft_is_candidate(struct lua_State *L) > +{ > + if (box_set_raft_is_candidate() != 0) > + luaT_error(L); > + return 0; > +} > + > +static int > +lbox_cfg_set_raft_election_timeout(struct lua_State *L) > +{ > + if (box_set_raft_election_timeout() != 0) > + luaT_error(L); > + return 0; > +} > + > static int > lbox_cfg_set_replication_timeout(struct lua_State *L) > { > @@ -382,6 +406,9 @@ box_lua_cfg_init(struct lua_State *L) > {"cfg_set_vinyl_max_tuple_size", lbox_cfg_set_vinyl_max_tuple_size}, > {"cfg_set_vinyl_cache", lbox_cfg_set_vinyl_cache}, > {"cfg_set_vinyl_timeout", lbox_cfg_set_vinyl_timeout}, > + {"cfg_set_raft_is_enabled", lbox_cfg_set_raft_is_enabled}, > + {"cfg_set_raft_is_candidate", lbox_cfg_set_raft_is_candidate}, > + {"cfg_set_raft_election_timeout", lbox_cfg_set_raft_election_timeout}, > {"cfg_set_replication_timeout", lbox_cfg_set_replication_timeout}, > {"cfg_set_replication_connect_quorum", lbox_cfg_set_replication_connect_quorum}, > {"cfg_set_replication_connect_timeout", lbox_cfg_set_replication_connect_timeout}, > diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua > index 53f572895..2c98fd837 100644 > --- a/src/box/lua/load_cfg.lua > +++ b/src/box/lua/load_cfg.lua > @@ -86,6 +86,9 @@ local default_cfg = { > checkpoint_wal_threshold = 1e18, > checkpoint_count = 2, > worker_pool_threads = 4, > + raft_is_enabled = false, > + raft_is_candidate = true, > + raft_election_timeout = 5, > replication_timeout = 1, > replication_sync_lag = 10, > replication_sync_timeout = 300, > @@ -163,6 +166,9 @@ local template_cfg = { > read_only = 'boolean', > hot_standby = 'boolean', > worker_pool_threads = 'number', > + raft_is_enabled = 'boolean', > + raft_is_candidate = 'boolean', > + raft_election_timeout = 'number', > replication_timeout = 'number', > replication_sync_lag = 'number', > replication_sync_timeout = 'number', > @@ -279,6 +285,9 @@ local dynamic_cfg = { > require('title').update(box.cfg.custom_proc_title) > end, > force_recovery = function() end, > + raft_is_enabled = private.cfg_set_raft_is_enabled, > + raft_is_candidate = private.cfg_set_raft_is_candidate, > + raft_election_timeout = private.cfg_set_raft_election_timeout, > replication_timeout = private.cfg_set_replication_timeout, > replication_connect_timeout = private.cfg_set_replication_connect_timeout, > replication_connect_quorum = private.cfg_set_replication_connect_quorum, > @@ -333,6 +342,9 @@ local dynamic_cfg_order = { > -- the new one. This should be fixed when box.cfg is able to > -- apply some parameters together and atomically. > replication_anon = 250, > + raft_is_enabled = 300, > + raft_is_candidate = 310, > + raft_election_timeout = 320, > } > > local function sort_cfg_cb(l, r) > @@ -350,6 +362,9 @@ local dynamic_cfg_skip_at_load = { > vinyl_cache = true, > vinyl_timeout = true, > too_long_threshold = true, > + raft_is_enabled = true, > + raft_is_candidate = true, > + raft_election_timeout = true, > replication = true, > replication_timeout = true, > replication_connect_timeout = true, > diff --git a/src/box/raft.c b/src/box/raft.c > index 511fe42f5..ee54d02b7 100644 > --- a/src/box/raft.c > +++ b/src/box/raft.c > @@ -37,6 +37,8 @@ > > /** Raft state of this instance. */ > struct raft raft = { > + .is_enabled = false, > + .is_candidate = false, > .term = 1, > .vote = 0, > }; > @@ -63,3 +65,31 @@ raft_serialize_for_disk(struct raft_request *req) > req->term = raft.term; > req->vote = raft.vote; > } > + > +void > +raft_cfg_is_enabled(bool is_enabled) > +{ > + raft.is_enabled = is_enabled; > +} > + > +void > +raft_cfg_is_candidate(bool is_candidate) > +{ > + raft.is_candidate = is_candidate; > +} > + > +void > +raft_cfg_election_timeout(double timeout) > +{ > + raft.election_timeout = timeout; > +} > + > +void > +raft_cfg_election_quorum(void) > +{ > +} > + > +void > +raft_cfg_death_timeout(void) > +{ > +} > diff --git a/src/box/raft.h b/src/box/raft.h > index 31f7becdb..f27222752 100644 > --- a/src/box/raft.h > +++ b/src/box/raft.h > @@ -30,6 +30,7 @@ > * SUCH DAMAGE. > */ > #include > +#include > > #if defined(__cplusplus) > extern "C" { > @@ -38,8 +39,11 @@ extern "C" { > struct raft_request; > > struct raft { > + bool is_enabled; > + bool is_candidate; > uint64_t term; > uint32_t vote; > + double election_timeout; > }; > > extern struct raft raft; > @@ -48,6 +52,37 @@ extern struct raft raft; > void > raft_process_recovery(const struct raft_request *req); > > +/** Configure whether Raft is enabled. */ > +void > +raft_cfg_is_enabled(bool is_enabled); > + > +/** > + * Configure whether the instance can be elected as Raft leader. Even if false, > + * the node still can vote, when Raft is enabled. > + */ > +void > +raft_cfg_is_candidate(bool is_candidate); > + > +/** Configure Raft leader election timeout. */ > +void > +raft_cfg_election_timeout(double timeout); > + > +/** > + * Configure Raft leader election quorum. There is no a separate option. > + * Instead, synchronous replication quorum is used. Since Raft is tightly bound > + * with synchronous replication. > + */ > +void > +raft_cfg_election_quorum(void); > + > +/** > + * Configure Raft leader death timeout. I.e. number of seconds without > + * heartbeats from the leader to consider it dead. There is no a separate > + * option. Raft uses replication timeout for that. > + */ > +void > +raft_cfg_death_timeout(void); > + > /** > * Save complete Raft state into a request to be sent to other instances of the > * cluster. It is allowed to save anything here, not only persistent state. > diff --git a/test/app-tap/init_script.result b/test/app-tap/init_script.result > index 857f0c95f..1d191987a 100644 > --- a/test/app-tap/init_script.result > +++ b/test/app-tap/init_script.result > @@ -23,6 +23,9 @@ memtx_memory:107374182 > memtx_min_tuple_size:16 > net_msg_max:768 > pid_file:box.pid > +raft_election_timeout:5 > +raft_is_candidate:true > +raft_is_enabled:false > read_only:false > readahead:16320 > replication_anon:false > diff --git a/test/box/admin.result b/test/box/admin.result > index ab3e80a97..13536a318 100644 > --- a/test/box/admin.result > +++ b/test/box/admin.result > @@ -67,6 +67,12 @@ cfg_filter(box.cfg) > - 768 > - - pid_file > - > + - - raft_election_timeout > + - 5 > + - - raft_is_candidate > + - true > + - - raft_is_enabled > + - false > - - read_only > - false > - - readahead > diff --git a/test/box/cfg.result b/test/box/cfg.result > index bdd210b09..11358b2cd 100644 > --- a/test/box/cfg.result > +++ b/test/box/cfg.result > @@ -55,6 +55,12 @@ cfg_filter(box.cfg) > | - 768 > | - - pid_file > | - > + | - - raft_election_timeout > + | - 5 > + | - - raft_is_candidate > + | - true > + | - - raft_is_enabled > + | - false > | - - read_only > | - false > | - - readahead > @@ -162,6 +168,12 @@ cfg_filter(box.cfg) > | - 768 > | - - pid_file > | - > + | - - raft_election_timeout > + | - 5 > + | - - raft_is_candidate > + | - true > + | - - raft_is_enabled > + | - false > | - - read_only > | - false > | - - readahead -- Serge Petrenko