[Tarantool-patches] [PATCH v2 06/10] raft: introduce box.cfg.raft_* options

Serge Petrenko sergepetrenko at tarantool.org
Fri Sep 4 12:07:51 MSK 2020


04.09.2020 01:51, Vladislav Shpilevoy пишет:
> The new options are:
>
> - raft_is_enabled - enable/disable Raft. When disabled, the node
>    is supposed to work like if Raft does not exist. Like earlier;
>
> - raft_is_candidate - a flag whether the instance can try to
>    become a leader. Note, it can vote for other nodes regardless of
>    value of this option;
>
> - raft_election_timeout - how long need to wait until election
>    end, in seconds.
>
> The options don't do anything now. They are added separately in
> order to keep such mundane changes from the main Raft commit, to
> simplify its review.
>
> Part of #1146


Thanks for the patch!

Please see my comment below.

> ---
>   src/box/box.cc                  | 91 +++++++++++++++++++++++++++++++++
>   src/box/box.h                   |  3 ++
>   src/box/lua/cfg.cc              | 27 ++++++++++
>   src/box/lua/load_cfg.lua        | 15 ++++++
>   src/box/raft.c                  | 30 +++++++++++
>   src/box/raft.h                  | 35 +++++++++++++
>   test/app-tap/init_script.result |  3 ++
>   test/box/admin.result           |  6 +++
>   test/box/cfg.result             | 12 +++++
>   9 files changed, 222 insertions(+)
>
> diff --git a/src/box/box.cc b/src/box/box.cc
> index 281917af2..5f04a1a78 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -472,6 +472,40 @@ box_check_uri(const char *source, const char *option_name)
>   	}
>   }
>   
> +static int
> +box_check_raft_is_enabled(void)
> +{
> +	int b = cfg_getb("raft_is_enabled");
> +	if (b < 0) {
> +		diag_set(ClientError, ER_CFG, "raft_is_enabled",
> +			 "the value must be a boolean");
> +	}
> +	return b;
> +}
> +
> +static int
> +box_check_raft_is_candidate(void)
> +{
> +	int b = cfg_getb("raft_is_candidate");
> +	if (b < 0) {
> +		diag_set(ClientError, ER_CFG, "raft_is_candidate",
> +			 "the value must be a boolean");
> +	}
> +	return b;
> +}
> +
> +static double
> +box_check_raft_election_timeout(void)
> +{
> +	double d = cfg_getd("raft_election_timeout");
> +	if (d == 0) {

Should be "d <= 0" here?

Otherwise you end up with a diag_raise without appropriate diag_set
when raft_election_timeout is negative.


> +		diag_set(ClientError, ER_CFG, "raft_election_timeout",
> +			 "the value must be a positive number");
> +		return -1;
> +	}
> +	return d;
> +}
> +
>   static void
>   box_check_replication(void)
>   {
> @@ -729,6 +763,12 @@ box_check_config(void)
>   	box_check_uri(cfg_gets("listen"), "listen");
>   	box_check_instance_uuid(&uuid);
>   	box_check_replicaset_uuid(&uuid);
> +	if (box_check_raft_is_enabled() < 0)
> +		diag_raise();
> +	if (box_check_raft_is_candidate() < 0)
> +		diag_raise();
> +	if (box_check_raft_election_timeout() < 0)
> +		diag_raise();
>   	box_check_replication();
>   	box_check_replication_timeout();
>   	box_check_replication_connect_timeout();
> @@ -751,6 +791,36 @@ box_check_config(void)
>   		diag_raise();
>   }
>   
> +int
> +box_set_raft_is_enabled(void)
> +{
> +	int b = box_check_raft_is_enabled();
> +	if (b < 0)
> +		return -1;
> +	raft_cfg_is_enabled(b);
> +	return 0;
> +}
> +
> +int
> +box_set_raft_is_candidate(void)
> +{
> +	int b = box_check_raft_is_candidate();
> +	if (b < 0)
> +		return -1;
> +	raft_cfg_is_candidate(b);
> +	return 0;
> +}
> +
> +int
> +box_set_raft_election_timeout(void)
> +{
> +	double d = box_check_raft_election_timeout();
> +	if (d < 0)
> +		return -1;
> +	raft_cfg_election_timeout(d);
> +	return 0;
> +}
> +
>   /*
>    * Parse box.cfg.replication and create appliers.
>    */
> @@ -835,6 +905,7 @@ void
>   box_set_replication_timeout(void)
>   {
>   	replication_timeout = box_check_replication_timeout();
> +	raft_cfg_death_timeout();
>   }
>   
>   void
> @@ -865,6 +936,7 @@ box_set_replication_synchro_quorum(void)
>   		return -1;
>   	replication_synchro_quorum = value;
>   	txn_limbo_on_parameters_change(&txn_limbo);
> +	raft_cfg_election_quorum();
>   	return 0;
>   }
>   
> @@ -2680,6 +2752,25 @@ box_cfg_xc(void)
>   
>   	fiber_gc();
>   	is_box_configured = true;
> +	/*
> +	 * Fill in Raft parameters after bootstrap. Before it is not possible -
> +	 * there may be Raft data to recover from WAL and snapshot. Also until
> +	 * recovery is done, it is not possible to write new records into WAL.
> +	 * It is also totally safe, because relaying is not started until the
> +	 * box is configured. So it can't happen, that this Raft node will try
> +	 * to relay to another Raft node without Raft enabled leading to
> +	 * disconnect.
> +	 */
> +	if (box_set_raft_is_candidate() != 0)
> +		diag_raise();
> +	if (box_set_raft_election_timeout() != 0)
> +		diag_raise();
> +	/*
> +	 * Raft is enabled last. So as all the parameters are installed by that
> +	 * time.
> +	 */
> +	if (box_set_raft_is_enabled() != 0)
> +		diag_raise();
>   
>   	title("running");
>   	say_info("ready to accept requests");
> diff --git a/src/box/box.h b/src/box/box.h
> index 5988264a5..637d10dd3 100644
> --- a/src/box/box.h
> +++ b/src/box/box.h
> @@ -245,6 +245,9 @@ void box_set_vinyl_memory(void);
>   void box_set_vinyl_max_tuple_size(void);
>   void box_set_vinyl_cache(void);
>   void box_set_vinyl_timeout(void);
> +int box_set_raft_is_enabled(void);
> +int box_set_raft_is_candidate(void);
> +int box_set_raft_election_timeout(void);
>   void box_set_replication_timeout(void);
>   void box_set_replication_connect_timeout(void);
>   void box_set_replication_connect_quorum(void);
> diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc
> index d481155cd..339b85f9d 100644
> --- a/src/box/lua/cfg.cc
> +++ b/src/box/lua/cfg.cc
> @@ -269,6 +269,30 @@ lbox_cfg_set_worker_pool_threads(struct lua_State *L)
>   	return 0;
>   }
>   
> +static int
> +lbox_cfg_set_raft_is_enabled(struct lua_State *L)
> +{
> +	if (box_set_raft_is_enabled() != 0)
> +		luaT_error(L);
> +	return 0;
> +}
> +
> +static int
> +lbox_cfg_set_raft_is_candidate(struct lua_State *L)
> +{
> +	if (box_set_raft_is_candidate() != 0)
> +		luaT_error(L);
> +	return 0;
> +}
> +
> +static int
> +lbox_cfg_set_raft_election_timeout(struct lua_State *L)
> +{
> +	if (box_set_raft_election_timeout() != 0)
> +		luaT_error(L);
> +	return 0;
> +}
> +
>   static int
>   lbox_cfg_set_replication_timeout(struct lua_State *L)
>   {
> @@ -382,6 +406,9 @@ box_lua_cfg_init(struct lua_State *L)
>   		{"cfg_set_vinyl_max_tuple_size", lbox_cfg_set_vinyl_max_tuple_size},
>   		{"cfg_set_vinyl_cache", lbox_cfg_set_vinyl_cache},
>   		{"cfg_set_vinyl_timeout", lbox_cfg_set_vinyl_timeout},
> +		{"cfg_set_raft_is_enabled", lbox_cfg_set_raft_is_enabled},
> +		{"cfg_set_raft_is_candidate", lbox_cfg_set_raft_is_candidate},
> +		{"cfg_set_raft_election_timeout", lbox_cfg_set_raft_election_timeout},
>   		{"cfg_set_replication_timeout", lbox_cfg_set_replication_timeout},
>   		{"cfg_set_replication_connect_quorum", lbox_cfg_set_replication_connect_quorum},
>   		{"cfg_set_replication_connect_timeout", lbox_cfg_set_replication_connect_timeout},
> diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua
> index 53f572895..2c98fd837 100644
> --- a/src/box/lua/load_cfg.lua
> +++ b/src/box/lua/load_cfg.lua
> @@ -86,6 +86,9 @@ local default_cfg = {
>       checkpoint_wal_threshold = 1e18,
>       checkpoint_count    = 2,
>       worker_pool_threads = 4,
> +    raft_is_enabled       = false,
> +    raft_is_candidate     = true,
> +    raft_election_timeout = 5,
>       replication_timeout = 1,
>       replication_sync_lag = 10,
>       replication_sync_timeout = 300,
> @@ -163,6 +166,9 @@ local template_cfg = {
>       read_only           = 'boolean',
>       hot_standby         = 'boolean',
>       worker_pool_threads = 'number',
> +    raft_is_enabled       = 'boolean',
> +    raft_is_candidate     = 'boolean',
> +    raft_election_timeout = 'number',
>       replication_timeout = 'number',
>       replication_sync_lag = 'number',
>       replication_sync_timeout = 'number',
> @@ -279,6 +285,9 @@ local dynamic_cfg = {
>           require('title').update(box.cfg.custom_proc_title)
>       end,
>       force_recovery          = function() end,
> +    raft_is_enabled         = private.cfg_set_raft_is_enabled,
> +    raft_is_candidate       = private.cfg_set_raft_is_candidate,
> +    raft_election_timeout   = private.cfg_set_raft_election_timeout,
>       replication_timeout     = private.cfg_set_replication_timeout,
>       replication_connect_timeout = private.cfg_set_replication_connect_timeout,
>       replication_connect_quorum = private.cfg_set_replication_connect_quorum,
> @@ -333,6 +342,9 @@ local dynamic_cfg_order = {
>       -- the new one. This should be fixed when box.cfg is able to
>       -- apply some parameters together and atomically.
>       replication_anon        = 250,
> +    raft_is_enabled         = 300,
> +    raft_is_candidate       = 310,
> +    raft_election_timeout   = 320,
>   }
>   
>   local function sort_cfg_cb(l, r)
> @@ -350,6 +362,9 @@ local dynamic_cfg_skip_at_load = {
>       vinyl_cache             = true,
>       vinyl_timeout           = true,
>       too_long_threshold      = true,
> +    raft_is_enabled         = true,
> +    raft_is_candidate       = true,
> +    raft_election_timeout   = true,
>       replication             = true,
>       replication_timeout     = true,
>       replication_connect_timeout = true,
> diff --git a/src/box/raft.c b/src/box/raft.c
> index 511fe42f5..ee54d02b7 100644
> --- a/src/box/raft.c
> +++ b/src/box/raft.c
> @@ -37,6 +37,8 @@
>   
>   /** Raft state of this instance. */
>   struct raft raft = {
> +	.is_enabled = false,
> +	.is_candidate = false,
>   	.term = 1,
>   	.vote = 0,
>   };
> @@ -63,3 +65,31 @@ raft_serialize_for_disk(struct raft_request *req)
>   	req->term = raft.term;
>   	req->vote = raft.vote;
>   }
> +
> +void
> +raft_cfg_is_enabled(bool is_enabled)
> +{
> +	raft.is_enabled = is_enabled;
> +}
> +
> +void
> +raft_cfg_is_candidate(bool is_candidate)
> +{
> +	raft.is_candidate = is_candidate;
> +}
> +
> +void
> +raft_cfg_election_timeout(double timeout)
> +{
> +	raft.election_timeout = timeout;
> +}
> +
> +void
> +raft_cfg_election_quorum(void)
> +{
> +}
> +
> +void
> +raft_cfg_death_timeout(void)
> +{
> +}
> diff --git a/src/box/raft.h b/src/box/raft.h
> index 31f7becdb..f27222752 100644
> --- a/src/box/raft.h
> +++ b/src/box/raft.h
> @@ -30,6 +30,7 @@
>    * SUCH DAMAGE.
>    */
>   #include <stdint.h>
> +#include <stdbool.h>
>   
>   #if defined(__cplusplus)
>   extern "C" {
> @@ -38,8 +39,11 @@ extern "C" {
>   struct raft_request;
>   
>   struct raft {
> +	bool is_enabled;
> +	bool is_candidate;
>   	uint64_t term;
>   	uint32_t vote;
> +	double election_timeout;
>   };
>   
>   extern struct raft raft;
> @@ -48,6 +52,37 @@ extern struct raft raft;
>   void
>   raft_process_recovery(const struct raft_request *req);
>   
> +/** Configure whether Raft is enabled. */
> +void
> +raft_cfg_is_enabled(bool is_enabled);
> +
> +/**
> + * Configure whether the instance can be elected as Raft leader. Even if false,
> + * the node still can vote, when Raft is enabled.
> + */
> +void
> +raft_cfg_is_candidate(bool is_candidate);
> +
> +/** Configure Raft leader election timeout. */
> +void
> +raft_cfg_election_timeout(double timeout);
> +
> +/**
> + * Configure Raft leader election quorum. There is no a separate option.
> + * Instead, synchronous replication quorum is used. Since Raft is tightly bound
> + * with synchronous replication.
> + */
> +void
> +raft_cfg_election_quorum(void);
> +
> +/**
> + * Configure Raft leader death timeout. I.e. number of seconds without
> + * heartbeats from the leader to consider it dead. There is no a separate
> + * option. Raft uses replication timeout for that.
> + */
> +void
> +raft_cfg_death_timeout(void);
> +
>   /**
>    * Save complete Raft state into a request to be sent to other instances of the
>    * cluster. It is allowed to save anything here, not only persistent state.
> diff --git a/test/app-tap/init_script.result b/test/app-tap/init_script.result
> index 857f0c95f..1d191987a 100644
> --- a/test/app-tap/init_script.result
> +++ b/test/app-tap/init_script.result
> @@ -23,6 +23,9 @@ memtx_memory:107374182
>   memtx_min_tuple_size:16
>   net_msg_max:768
>   pid_file:box.pid
> +raft_election_timeout:5
> +raft_is_candidate:true
> +raft_is_enabled:false
>   read_only:false
>   readahead:16320
>   replication_anon:false
> diff --git a/test/box/admin.result b/test/box/admin.result
> index ab3e80a97..13536a318 100644
> --- a/test/box/admin.result
> +++ b/test/box/admin.result
> @@ -67,6 +67,12 @@ cfg_filter(box.cfg)
>       - 768
>     - - pid_file
>       - <hidden>
> +  - - raft_election_timeout
> +    - 5
> +  - - raft_is_candidate
> +    - true
> +  - - raft_is_enabled
> +    - false
>     - - read_only
>       - false
>     - - readahead
> diff --git a/test/box/cfg.result b/test/box/cfg.result
> index bdd210b09..11358b2cd 100644
> --- a/test/box/cfg.result
> +++ b/test/box/cfg.result
> @@ -55,6 +55,12 @@ cfg_filter(box.cfg)
>    |     - 768
>    |   - - pid_file
>    |     - <hidden>
> + |   - - raft_election_timeout
> + |     - 5
> + |   - - raft_is_candidate
> + |     - true
> + |   - - raft_is_enabled
> + |     - false
>    |   - - read_only
>    |     - false
>    |   - - readahead
> @@ -162,6 +168,12 @@ cfg_filter(box.cfg)
>    |     - 768
>    |   - - pid_file
>    |     - <hidden>
> + |   - - raft_election_timeout
> + |     - 5
> + |   - - raft_is_candidate
> + |     - true
> + |   - - raft_is_enabled
> + |     - false
>    |   - - read_only
>    |     - false
>    |   - - readahead

-- 
Serge Petrenko



More information about the Tarantool-patches mailing list