From: Vladislav Shpilevoy <v.shpilevoy@tarantool.org> To: tarantool-patches@dev.tarantool.org, sergepetrenko@tarantool.org Subject: [Tarantool-patches] [PATCH v3 06/10] raft: introduce box.cfg.election_* options Date: Wed, 30 Sep 2020 00:11:28 +0200 [thread overview] Message-ID: <1d329f0bbbf98dff878588b2ad754e2dd18404a7.1601417273.git.v.shpilevoy@tarantool.org> (raw) In-Reply-To: <cover.1601417273.git.v.shpilevoy@tarantool.org> The new options are: - election_is_enabled - enable/disable leader election (via Raft). When disabled, the node is supposed to work like if Raft does not exist. Like earlier; - election_is_candidate - a flag whether the instance can try to become a leader. Note, it can vote for other nodes regardless of value of this option; - election_timeout - how long need to wait until election end, in seconds. The options don't do anything now. They are added separately in order to keep such mundane changes from the main Raft commit, to simplify its review. Option names don't mention 'Raft' on purpose, because - Not all users know what is Raft, so they may not even know it is related to leader election; - In future the algorithm may change from Raft to something else, so better not to depend on it too much in the public API. Part of #1146 --- src/box/box.cc | 92 +++++++++++++++++++++++++++++++++ src/box/box.h | 3 ++ src/box/lua/cfg.cc | 27 ++++++++++ src/box/lua/load_cfg.lua | 15 ++++++ src/box/raft.c | 30 +++++++++++ src/box/raft.h | 35 +++++++++++++ test/app-tap/init_script.result | 3 ++ test/box/admin.result | 6 +++ test/box/cfg.result | 12 +++++ 9 files changed, 223 insertions(+) diff --git a/src/box/box.cc b/src/box/box.cc index 48fed9b2c..99a15bfd0 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -472,6 +472,40 @@ box_check_uri(const char *source, const char *option_name) } } +static int +box_check_election_is_enabled(void) +{ + int b = cfg_getb("election_is_enabled"); + if (b < 0) { + diag_set(ClientError, ER_CFG, "election_is_enabled", + "the value must be a boolean"); + } + return b; +} + +static int +box_check_election_is_candidate(void) +{ + int b = cfg_getb("election_is_candidate"); + if (b < 0) { + diag_set(ClientError, ER_CFG, "election_is_candidate", + "the value must be a boolean"); + } + return b; +} + +static double +box_check_election_timeout(void) +{ + double d = cfg_getd("election_timeout"); + if (d <= 0) { + diag_set(ClientError, ER_CFG, "election_timeout", + "the value must be a positive number"); + return -1; + } + return d; +} + static void box_check_replication(void) { @@ -729,6 +763,12 @@ box_check_config(void) box_check_uri(cfg_gets("listen"), "listen"); box_check_instance_uuid(&uuid); box_check_replicaset_uuid(&uuid); + if (box_check_election_is_enabled() < 0) + diag_raise(); + if (box_check_election_is_candidate() < 0) + diag_raise(); + if (box_check_election_timeout() < 0) + diag_raise(); box_check_replication(); box_check_replication_timeout(); box_check_replication_connect_timeout(); @@ -751,6 +791,36 @@ box_check_config(void) diag_raise(); } +int +box_set_election_is_enabled(void) +{ + int b = box_check_election_is_enabled(); + if (b < 0) + return -1; + raft_cfg_is_enabled(b); + return 0; +} + +int +box_set_election_is_candidate(void) +{ + int b = box_check_election_is_candidate(); + if (b < 0) + return -1; + raft_cfg_is_candidate(b); + return 0; +} + +int +box_set_election_timeout(void) +{ + double d = box_check_election_timeout(); + if (d < 0) + return -1; + raft_cfg_election_timeout(d); + return 0; +} + /* * Parse box.cfg.replication and create appliers. */ @@ -835,6 +905,7 @@ void box_set_replication_timeout(void) { replication_timeout = box_check_replication_timeout(); + raft_cfg_death_timeout(); } void @@ -865,6 +936,7 @@ box_set_replication_synchro_quorum(void) return -1; replication_synchro_quorum = value; txn_limbo_on_parameters_change(&txn_limbo); + raft_cfg_election_quorum(); return 0; } @@ -2686,6 +2758,26 @@ box_cfg_xc(void) fiber_gc(); is_box_configured = true; + /* + * Fill in leader election parameters after bootstrap. Before it is not + * possible - there may be relevant data to recover from WAL and + * snapshot. Also until recovery is done, it is not possible to write + * new records into WAL. It is also totally safe, because relaying is + * not started until the box is configured. So it can't happen, that + * this election-enabled node will try to relay to another + * election-enabled node without election actually enabled leading to + * disconnect. + */ + if (box_set_election_is_candidate() != 0) + diag_raise(); + if (box_set_election_timeout() != 0) + diag_raise(); + /* + * Election is enabled last. So as all the parameters are installed by + * that time. + */ + if (box_set_election_is_enabled() != 0) + diag_raise(); title("running"); say_info("ready to accept requests"); diff --git a/src/box/box.h b/src/box/box.h index 5988264a5..45ff8bbbf 100644 --- a/src/box/box.h +++ b/src/box/box.h @@ -245,6 +245,9 @@ void box_set_vinyl_memory(void); void box_set_vinyl_max_tuple_size(void); void box_set_vinyl_cache(void); void box_set_vinyl_timeout(void); +int box_set_election_is_enabled(void); +int box_set_election_is_candidate(void); +int box_set_election_timeout(void); void box_set_replication_timeout(void); void box_set_replication_connect_timeout(void); void box_set_replication_connect_quorum(void); diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc index d481155cd..bbb92f038 100644 --- a/src/box/lua/cfg.cc +++ b/src/box/lua/cfg.cc @@ -269,6 +269,30 @@ lbox_cfg_set_worker_pool_threads(struct lua_State *L) return 0; } +static int +lbox_cfg_set_election_is_enabled(struct lua_State *L) +{ + if (box_set_election_is_enabled() != 0) + luaT_error(L); + return 0; +} + +static int +lbox_cfg_set_election_is_candidate(struct lua_State *L) +{ + if (box_set_election_is_candidate() != 0) + luaT_error(L); + return 0; +} + +static int +lbox_cfg_set_election_timeout(struct lua_State *L) +{ + if (box_set_election_timeout() != 0) + luaT_error(L); + return 0; +} + static int lbox_cfg_set_replication_timeout(struct lua_State *L) { @@ -382,6 +406,9 @@ box_lua_cfg_init(struct lua_State *L) {"cfg_set_vinyl_max_tuple_size", lbox_cfg_set_vinyl_max_tuple_size}, {"cfg_set_vinyl_cache", lbox_cfg_set_vinyl_cache}, {"cfg_set_vinyl_timeout", lbox_cfg_set_vinyl_timeout}, + {"cfg_set_election_is_enabled", lbox_cfg_set_election_is_enabled}, + {"cfg_set_election_is_candidate", lbox_cfg_set_election_is_candidate}, + {"cfg_set_election_timeout", lbox_cfg_set_election_timeout}, {"cfg_set_replication_timeout", lbox_cfg_set_replication_timeout}, {"cfg_set_replication_connect_quorum", lbox_cfg_set_replication_connect_quorum}, {"cfg_set_replication_connect_timeout", lbox_cfg_set_replication_connect_timeout}, diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua index 92347a9fd..d558e7ac9 100644 --- a/src/box/lua/load_cfg.lua +++ b/src/box/lua/load_cfg.lua @@ -87,6 +87,9 @@ local default_cfg = { checkpoint_wal_threshold = 1e18, checkpoint_count = 2, worker_pool_threads = 4, + election_is_enabled = false, + election_is_candidate = true, + election_timeout = 5, replication_timeout = 1, replication_sync_lag = 10, replication_sync_timeout = 300, @@ -165,6 +168,9 @@ local template_cfg = { hot_standby = 'boolean', memtx_use_mvcc_engine = 'boolean', worker_pool_threads = 'number', + election_is_enabled = 'boolean', + election_is_candidate = 'boolean', + election_timeout = 'number', replication_timeout = 'number', replication_sync_lag = 'number', replication_sync_timeout = 'number', @@ -281,6 +287,9 @@ local dynamic_cfg = { require('title').update(box.cfg.custom_proc_title) end, force_recovery = function() end, + election_is_enabled = private.cfg_set_election_is_enabled, + election_is_candidate = private.cfg_set_election_is_candidate, + election_timeout = private.cfg_set_election_timeout, replication_timeout = private.cfg_set_replication_timeout, replication_connect_timeout = private.cfg_set_replication_connect_timeout, replication_connect_quorum = private.cfg_set_replication_connect_quorum, @@ -335,6 +344,9 @@ local dynamic_cfg_order = { -- the new one. This should be fixed when box.cfg is able to -- apply some parameters together and atomically. replication_anon = 250, + election_is_enabled = 300, + election_is_candidate = 310, + election_timeout = 320, } local function sort_cfg_cb(l, r) @@ -352,6 +364,9 @@ local dynamic_cfg_skip_at_load = { vinyl_cache = true, vinyl_timeout = true, too_long_threshold = true, + election_is_enabled = true, + election_is_candidate = true, + election_timeout = true, replication = true, replication_timeout = true, replication_connect_timeout = true, diff --git a/src/box/raft.c b/src/box/raft.c index 511fe42f5..ee54d02b7 100644 --- a/src/box/raft.c +++ b/src/box/raft.c @@ -37,6 +37,8 @@ /** Raft state of this instance. */ struct raft raft = { + .is_enabled = false, + .is_candidate = false, .term = 1, .vote = 0, }; @@ -63,3 +65,31 @@ raft_serialize_for_disk(struct raft_request *req) req->term = raft.term; req->vote = raft.vote; } + +void +raft_cfg_is_enabled(bool is_enabled) +{ + raft.is_enabled = is_enabled; +} + +void +raft_cfg_is_candidate(bool is_candidate) +{ + raft.is_candidate = is_candidate; +} + +void +raft_cfg_election_timeout(double timeout) +{ + raft.election_timeout = timeout; +} + +void +raft_cfg_election_quorum(void) +{ +} + +void +raft_cfg_death_timeout(void) +{ +} diff --git a/src/box/raft.h b/src/box/raft.h index 31f7becdb..f27222752 100644 --- a/src/box/raft.h +++ b/src/box/raft.h @@ -30,6 +30,7 @@ * SUCH DAMAGE. */ #include <stdint.h> +#include <stdbool.h> #if defined(__cplusplus) extern "C" { @@ -38,8 +39,11 @@ extern "C" { struct raft_request; struct raft { + bool is_enabled; + bool is_candidate; uint64_t term; uint32_t vote; + double election_timeout; }; extern struct raft raft; @@ -48,6 +52,37 @@ extern struct raft raft; void raft_process_recovery(const struct raft_request *req); +/** Configure whether Raft is enabled. */ +void +raft_cfg_is_enabled(bool is_enabled); + +/** + * Configure whether the instance can be elected as Raft leader. Even if false, + * the node still can vote, when Raft is enabled. + */ +void +raft_cfg_is_candidate(bool is_candidate); + +/** Configure Raft leader election timeout. */ +void +raft_cfg_election_timeout(double timeout); + +/** + * Configure Raft leader election quorum. There is no a separate option. + * Instead, synchronous replication quorum is used. Since Raft is tightly bound + * with synchronous replication. + */ +void +raft_cfg_election_quorum(void); + +/** + * Configure Raft leader death timeout. I.e. number of seconds without + * heartbeats from the leader to consider it dead. There is no a separate + * option. Raft uses replication timeout for that. + */ +void +raft_cfg_death_timeout(void); + /** * Save complete Raft state into a request to be sent to other instances of the * cluster. It is allowed to save anything here, not only persistent state. diff --git a/test/app-tap/init_script.result b/test/app-tap/init_script.result index c8974d708..d8969278b 100644 --- a/test/app-tap/init_script.result +++ b/test/app-tap/init_script.result @@ -8,6 +8,9 @@ checkpoint_count:2 checkpoint_interval:3600 checkpoint_wal_threshold:1e+18 coredump:false +election_is_candidate:true +election_is_enabled:false +election_timeout:5 feedback_enabled:true feedback_host:https://feedback.tarantool.io feedback_interval:3600 diff --git a/test/box/admin.result b/test/box/admin.result index d1540a71e..52b62356f 100644 --- a/test/box/admin.result +++ b/test/box/admin.result @@ -37,6 +37,12 @@ cfg_filter(box.cfg) - 1000000000000000000 - - coredump - false + - - election_is_candidate + - true + - - election_is_enabled + - false + - - election_timeout + - 5 - - feedback_enabled - true - - feedback_host diff --git a/test/box/cfg.result b/test/box/cfg.result index fcfc64b22..f19f4bff7 100644 --- a/test/box/cfg.result +++ b/test/box/cfg.result @@ -25,6 +25,12 @@ cfg_filter(box.cfg) | - 1000000000000000000 | - - coredump | - false + | - - election_is_candidate + | - true + | - - election_is_enabled + | - false + | - - election_timeout + | - 5 | - - feedback_enabled | - true | - - feedback_host @@ -134,6 +140,12 @@ cfg_filter(box.cfg) | - 1000000000000000000 | - - coredump | - false + | - - election_is_candidate + | - true + | - - election_is_enabled + | - false + | - - election_timeout + | - 5 | - - feedback_enabled | - true | - - feedback_host -- 2.21.1 (Apple Git-122.3)
next prev parent reply other threads:[~2020-09-29 22:11 UTC|newest] Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top 2020-09-29 22:11 [Tarantool-patches] [PATCH v3 00/10] Raft Vladislav Shpilevoy 2020-09-29 22:11 ` [Tarantool-patches] [PATCH v3 01/10] applier: store instance_id in struct applier Vladislav Shpilevoy 2020-09-29 22:11 ` [Tarantool-patches] [PATCH v3 10/10] raft: add tests Vladislav Shpilevoy 2020-09-29 22:11 ` [Tarantool-patches] [PATCH v3 02/10] box: introduce summary RO flag Vladislav Shpilevoy 2020-09-29 22:11 ` [Tarantool-patches] [PATCH v3 03/10] wal: don't touch box.cfg.wal_dir more than once Vladislav Shpilevoy 2020-09-29 22:11 ` [Tarantool-patches] [PATCH v3 04/10] replication: track registered replica count Vladislav Shpilevoy 2020-09-29 22:11 ` [Tarantool-patches] [PATCH v3 05/10] raft: introduce persistent raft state Vladislav Shpilevoy 2020-09-29 22:11 ` Vladislav Shpilevoy [this message] 2020-09-29 22:11 ` [Tarantool-patches] [PATCH v3 07/10] raft: relay status updates to followers Vladislav Shpilevoy 2020-09-29 22:11 ` [Tarantool-patches] [PATCH v3 08/10] raft: introduce state machine Vladislav Shpilevoy 2020-09-29 22:11 ` [Tarantool-patches] [PATCH v3 09/10] raft: introduce box.info.election Vladislav Shpilevoy 2020-09-30 7:06 ` [Tarantool-patches] [PATCH v3 00/10] Raft Serge Petrenko 2020-09-30 11:04 ` Kirill Yukhin
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=1d329f0bbbf98dff878588b2ad754e2dd18404a7.1601417273.git.v.shpilevoy@tarantool.org \ --to=v.shpilevoy@tarantool.org \ --cc=sergepetrenko@tarantool.org \ --cc=tarantool-patches@dev.tarantool.org \ --subject='Re: [Tarantool-patches] [PATCH v3 06/10] raft: introduce box.cfg.election_* options' \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox