From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from smtpng3.m.smailru.net (smtpng3.m.smailru.net [94.100.177.149]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dev.tarantool.org (Postfix) with ESMTPS id B25FF44643A for ; Wed, 30 Sep 2020 01:11:50 +0300 (MSK) From: Vladislav Shpilevoy Date: Wed, 30 Sep 2020 00:11:28 +0200 Message-Id: <1d329f0bbbf98dff878588b2ad754e2dd18404a7.1601417273.git.v.shpilevoy@tarantool.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: [Tarantool-patches] [PATCH v3 06/10] raft: introduce box.cfg.election_* options List-Id: Tarantool development patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: tarantool-patches@dev.tarantool.org, sergepetrenko@tarantool.org The new options are: - election_is_enabled - enable/disable leader election (via Raft). When disabled, the node is supposed to work like if Raft does not exist. Like earlier; - election_is_candidate - a flag whether the instance can try to become a leader. Note, it can vote for other nodes regardless of value of this option; - election_timeout - how long need to wait until election end, in seconds. The options don't do anything now. They are added separately in order to keep such mundane changes from the main Raft commit, to simplify its review. Option names don't mention 'Raft' on purpose, because - Not all users know what is Raft, so they may not even know it is related to leader election; - In future the algorithm may change from Raft to something else, so better not to depend on it too much in the public API. Part of #1146 --- src/box/box.cc | 92 +++++++++++++++++++++++++++++++++ src/box/box.h | 3 ++ src/box/lua/cfg.cc | 27 ++++++++++ src/box/lua/load_cfg.lua | 15 ++++++ src/box/raft.c | 30 +++++++++++ src/box/raft.h | 35 +++++++++++++ test/app-tap/init_script.result | 3 ++ test/box/admin.result | 6 +++ test/box/cfg.result | 12 +++++ 9 files changed, 223 insertions(+) diff --git a/src/box/box.cc b/src/box/box.cc index 48fed9b2c..99a15bfd0 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -472,6 +472,40 @@ box_check_uri(const char *source, const char *option_name) } } +static int +box_check_election_is_enabled(void) +{ + int b = cfg_getb("election_is_enabled"); + if (b < 0) { + diag_set(ClientError, ER_CFG, "election_is_enabled", + "the value must be a boolean"); + } + return b; +} + +static int +box_check_election_is_candidate(void) +{ + int b = cfg_getb("election_is_candidate"); + if (b < 0) { + diag_set(ClientError, ER_CFG, "election_is_candidate", + "the value must be a boolean"); + } + return b; +} + +static double +box_check_election_timeout(void) +{ + double d = cfg_getd("election_timeout"); + if (d <= 0) { + diag_set(ClientError, ER_CFG, "election_timeout", + "the value must be a positive number"); + return -1; + } + return d; +} + static void box_check_replication(void) { @@ -729,6 +763,12 @@ box_check_config(void) box_check_uri(cfg_gets("listen"), "listen"); box_check_instance_uuid(&uuid); box_check_replicaset_uuid(&uuid); + if (box_check_election_is_enabled() < 0) + diag_raise(); + if (box_check_election_is_candidate() < 0) + diag_raise(); + if (box_check_election_timeout() < 0) + diag_raise(); box_check_replication(); box_check_replication_timeout(); box_check_replication_connect_timeout(); @@ -751,6 +791,36 @@ box_check_config(void) diag_raise(); } +int +box_set_election_is_enabled(void) +{ + int b = box_check_election_is_enabled(); + if (b < 0) + return -1; + raft_cfg_is_enabled(b); + return 0; +} + +int +box_set_election_is_candidate(void) +{ + int b = box_check_election_is_candidate(); + if (b < 0) + return -1; + raft_cfg_is_candidate(b); + return 0; +} + +int +box_set_election_timeout(void) +{ + double d = box_check_election_timeout(); + if (d < 0) + return -1; + raft_cfg_election_timeout(d); + return 0; +} + /* * Parse box.cfg.replication and create appliers. */ @@ -835,6 +905,7 @@ void box_set_replication_timeout(void) { replication_timeout = box_check_replication_timeout(); + raft_cfg_death_timeout(); } void @@ -865,6 +936,7 @@ box_set_replication_synchro_quorum(void) return -1; replication_synchro_quorum = value; txn_limbo_on_parameters_change(&txn_limbo); + raft_cfg_election_quorum(); return 0; } @@ -2686,6 +2758,26 @@ box_cfg_xc(void) fiber_gc(); is_box_configured = true; + /* + * Fill in leader election parameters after bootstrap. Before it is not + * possible - there may be relevant data to recover from WAL and + * snapshot. Also until recovery is done, it is not possible to write + * new records into WAL. It is also totally safe, because relaying is + * not started until the box is configured. So it can't happen, that + * this election-enabled node will try to relay to another + * election-enabled node without election actually enabled leading to + * disconnect. + */ + if (box_set_election_is_candidate() != 0) + diag_raise(); + if (box_set_election_timeout() != 0) + diag_raise(); + /* + * Election is enabled last. So as all the parameters are installed by + * that time. + */ + if (box_set_election_is_enabled() != 0) + diag_raise(); title("running"); say_info("ready to accept requests"); diff --git a/src/box/box.h b/src/box/box.h index 5988264a5..45ff8bbbf 100644 --- a/src/box/box.h +++ b/src/box/box.h @@ -245,6 +245,9 @@ void box_set_vinyl_memory(void); void box_set_vinyl_max_tuple_size(void); void box_set_vinyl_cache(void); void box_set_vinyl_timeout(void); +int box_set_election_is_enabled(void); +int box_set_election_is_candidate(void); +int box_set_election_timeout(void); void box_set_replication_timeout(void); void box_set_replication_connect_timeout(void); void box_set_replication_connect_quorum(void); diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc index d481155cd..bbb92f038 100644 --- a/src/box/lua/cfg.cc +++ b/src/box/lua/cfg.cc @@ -269,6 +269,30 @@ lbox_cfg_set_worker_pool_threads(struct lua_State *L) return 0; } +static int +lbox_cfg_set_election_is_enabled(struct lua_State *L) +{ + if (box_set_election_is_enabled() != 0) + luaT_error(L); + return 0; +} + +static int +lbox_cfg_set_election_is_candidate(struct lua_State *L) +{ + if (box_set_election_is_candidate() != 0) + luaT_error(L); + return 0; +} + +static int +lbox_cfg_set_election_timeout(struct lua_State *L) +{ + if (box_set_election_timeout() != 0) + luaT_error(L); + return 0; +} + static int lbox_cfg_set_replication_timeout(struct lua_State *L) { @@ -382,6 +406,9 @@ box_lua_cfg_init(struct lua_State *L) {"cfg_set_vinyl_max_tuple_size", lbox_cfg_set_vinyl_max_tuple_size}, {"cfg_set_vinyl_cache", lbox_cfg_set_vinyl_cache}, {"cfg_set_vinyl_timeout", lbox_cfg_set_vinyl_timeout}, + {"cfg_set_election_is_enabled", lbox_cfg_set_election_is_enabled}, + {"cfg_set_election_is_candidate", lbox_cfg_set_election_is_candidate}, + {"cfg_set_election_timeout", lbox_cfg_set_election_timeout}, {"cfg_set_replication_timeout", lbox_cfg_set_replication_timeout}, {"cfg_set_replication_connect_quorum", lbox_cfg_set_replication_connect_quorum}, {"cfg_set_replication_connect_timeout", lbox_cfg_set_replication_connect_timeout}, diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua index 92347a9fd..d558e7ac9 100644 --- a/src/box/lua/load_cfg.lua +++ b/src/box/lua/load_cfg.lua @@ -87,6 +87,9 @@ local default_cfg = { checkpoint_wal_threshold = 1e18, checkpoint_count = 2, worker_pool_threads = 4, + election_is_enabled = false, + election_is_candidate = true, + election_timeout = 5, replication_timeout = 1, replication_sync_lag = 10, replication_sync_timeout = 300, @@ -165,6 +168,9 @@ local template_cfg = { hot_standby = 'boolean', memtx_use_mvcc_engine = 'boolean', worker_pool_threads = 'number', + election_is_enabled = 'boolean', + election_is_candidate = 'boolean', + election_timeout = 'number', replication_timeout = 'number', replication_sync_lag = 'number', replication_sync_timeout = 'number', @@ -281,6 +287,9 @@ local dynamic_cfg = { require('title').update(box.cfg.custom_proc_title) end, force_recovery = function() end, + election_is_enabled = private.cfg_set_election_is_enabled, + election_is_candidate = private.cfg_set_election_is_candidate, + election_timeout = private.cfg_set_election_timeout, replication_timeout = private.cfg_set_replication_timeout, replication_connect_timeout = private.cfg_set_replication_connect_timeout, replication_connect_quorum = private.cfg_set_replication_connect_quorum, @@ -335,6 +344,9 @@ local dynamic_cfg_order = { -- the new one. This should be fixed when box.cfg is able to -- apply some parameters together and atomically. replication_anon = 250, + election_is_enabled = 300, + election_is_candidate = 310, + election_timeout = 320, } local function sort_cfg_cb(l, r) @@ -352,6 +364,9 @@ local dynamic_cfg_skip_at_load = { vinyl_cache = true, vinyl_timeout = true, too_long_threshold = true, + election_is_enabled = true, + election_is_candidate = true, + election_timeout = true, replication = true, replication_timeout = true, replication_connect_timeout = true, diff --git a/src/box/raft.c b/src/box/raft.c index 511fe42f5..ee54d02b7 100644 --- a/src/box/raft.c +++ b/src/box/raft.c @@ -37,6 +37,8 @@ /** Raft state of this instance. */ struct raft raft = { + .is_enabled = false, + .is_candidate = false, .term = 1, .vote = 0, }; @@ -63,3 +65,31 @@ raft_serialize_for_disk(struct raft_request *req) req->term = raft.term; req->vote = raft.vote; } + +void +raft_cfg_is_enabled(bool is_enabled) +{ + raft.is_enabled = is_enabled; +} + +void +raft_cfg_is_candidate(bool is_candidate) +{ + raft.is_candidate = is_candidate; +} + +void +raft_cfg_election_timeout(double timeout) +{ + raft.election_timeout = timeout; +} + +void +raft_cfg_election_quorum(void) +{ +} + +void +raft_cfg_death_timeout(void) +{ +} diff --git a/src/box/raft.h b/src/box/raft.h index 31f7becdb..f27222752 100644 --- a/src/box/raft.h +++ b/src/box/raft.h @@ -30,6 +30,7 @@ * SUCH DAMAGE. */ #include +#include #if defined(__cplusplus) extern "C" { @@ -38,8 +39,11 @@ extern "C" { struct raft_request; struct raft { + bool is_enabled; + bool is_candidate; uint64_t term; uint32_t vote; + double election_timeout; }; extern struct raft raft; @@ -48,6 +52,37 @@ extern struct raft raft; void raft_process_recovery(const struct raft_request *req); +/** Configure whether Raft is enabled. */ +void +raft_cfg_is_enabled(bool is_enabled); + +/** + * Configure whether the instance can be elected as Raft leader. Even if false, + * the node still can vote, when Raft is enabled. + */ +void +raft_cfg_is_candidate(bool is_candidate); + +/** Configure Raft leader election timeout. */ +void +raft_cfg_election_timeout(double timeout); + +/** + * Configure Raft leader election quorum. There is no a separate option. + * Instead, synchronous replication quorum is used. Since Raft is tightly bound + * with synchronous replication. + */ +void +raft_cfg_election_quorum(void); + +/** + * Configure Raft leader death timeout. I.e. number of seconds without + * heartbeats from the leader to consider it dead. There is no a separate + * option. Raft uses replication timeout for that. + */ +void +raft_cfg_death_timeout(void); + /** * Save complete Raft state into a request to be sent to other instances of the * cluster. It is allowed to save anything here, not only persistent state. diff --git a/test/app-tap/init_script.result b/test/app-tap/init_script.result index c8974d708..d8969278b 100644 --- a/test/app-tap/init_script.result +++ b/test/app-tap/init_script.result @@ -8,6 +8,9 @@ checkpoint_count:2 checkpoint_interval:3600 checkpoint_wal_threshold:1e+18 coredump:false +election_is_candidate:true +election_is_enabled:false +election_timeout:5 feedback_enabled:true feedback_host:https://feedback.tarantool.io feedback_interval:3600 diff --git a/test/box/admin.result b/test/box/admin.result index d1540a71e..52b62356f 100644 --- a/test/box/admin.result +++ b/test/box/admin.result @@ -37,6 +37,12 @@ cfg_filter(box.cfg) - 1000000000000000000 - - coredump - false + - - election_is_candidate + - true + - - election_is_enabled + - false + - - election_timeout + - 5 - - feedback_enabled - true - - feedback_host diff --git a/test/box/cfg.result b/test/box/cfg.result index fcfc64b22..f19f4bff7 100644 --- a/test/box/cfg.result +++ b/test/box/cfg.result @@ -25,6 +25,12 @@ cfg_filter(box.cfg) | - 1000000000000000000 | - - coredump | - false + | - - election_is_candidate + | - true + | - - election_is_enabled + | - false + | - - election_timeout + | - 5 | - - feedback_enabled | - true | - - feedback_host @@ -134,6 +140,12 @@ cfg_filter(box.cfg) | - 1000000000000000000 | - - coredump | - false + | - - election_is_candidate + | - true + | - - election_is_enabled + | - false + | - - election_timeout + | - 5 | - - feedback_enabled | - true | - - feedback_host -- 2.21.1 (Apple Git-122.3)