[Tarantool-patches] [PATCH v2 07/11] raft: introduce box.cfg.raft_* options

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Thu Sep 10 02:16:58 MSK 2020


The new options are:

- raft_is_enabled - enable/disable Raft. When disabled, the node
  is supposed to work like if Raft does not exist. Like earlier;

- raft_is_candidate - a flag whether the instance can try to
  become a leader. Note, it can vote for other nodes regardless of
  value of this option;

- raft_election_timeout - how long need to wait until election
  end, in seconds.

The options don't do anything now. They are added separately in
order to keep such mundane changes from the main Raft commit, to
simplify its review.

Part of #1146
---
 src/box/box.cc                  | 91 +++++++++++++++++++++++++++++++++
 src/box/box.h                   |  3 ++
 src/box/lua/cfg.cc              | 27 ++++++++++
 src/box/lua/load_cfg.lua        | 15 ++++++
 src/box/raft.c                  | 30 +++++++++++
 src/box/raft.h                  | 35 +++++++++++++
 test/app-tap/init_script.result |  3 ++
 test/box/admin.result           |  6 +++
 test/box/cfg.result             | 12 +++++
 9 files changed, 222 insertions(+)

diff --git a/src/box/box.cc b/src/box/box.cc
index 1a19058b3..7c3c895d2 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -472,6 +472,40 @@ box_check_uri(const char *source, const char *option_name)
 	}
 }
 
+static int
+box_check_raft_is_enabled(void)
+{
+	int b = cfg_getb("raft_is_enabled");
+	if (b < 0) {
+		diag_set(ClientError, ER_CFG, "raft_is_enabled",
+			 "the value must be a boolean");
+	}
+	return b;
+}
+
+static int
+box_check_raft_is_candidate(void)
+{
+	int b = cfg_getb("raft_is_candidate");
+	if (b < 0) {
+		diag_set(ClientError, ER_CFG, "raft_is_candidate",
+			 "the value must be a boolean");
+	}
+	return b;
+}
+
+static double
+box_check_raft_election_timeout(void)
+{
+	double d = cfg_getd("raft_election_timeout");
+	if (d <= 0) {
+		diag_set(ClientError, ER_CFG, "raft_election_timeout",
+			 "the value must be a positive number");
+		return -1;
+	}
+	return d;
+}
+
 static void
 box_check_replication(void)
 {
@@ -729,6 +763,12 @@ box_check_config(void)
 	box_check_uri(cfg_gets("listen"), "listen");
 	box_check_instance_uuid(&uuid);
 	box_check_replicaset_uuid(&uuid);
+	if (box_check_raft_is_enabled() < 0)
+		diag_raise();
+	if (box_check_raft_is_candidate() < 0)
+		diag_raise();
+	if (box_check_raft_election_timeout() < 0)
+		diag_raise();
 	box_check_replication();
 	box_check_replication_timeout();
 	box_check_replication_connect_timeout();
@@ -751,6 +791,36 @@ box_check_config(void)
 		diag_raise();
 }
 
+int
+box_set_raft_is_enabled(void)
+{
+	int b = box_check_raft_is_enabled();
+	if (b < 0)
+		return -1;
+	raft_cfg_is_enabled(b);
+	return 0;
+}
+
+int
+box_set_raft_is_candidate(void)
+{
+	int b = box_check_raft_is_candidate();
+	if (b < 0)
+		return -1;
+	raft_cfg_is_candidate(b);
+	return 0;
+}
+
+int
+box_set_raft_election_timeout(void)
+{
+	double d = box_check_raft_election_timeout();
+	if (d < 0)
+		return -1;
+	raft_cfg_election_timeout(d);
+	return 0;
+}
+
 /*
  * Parse box.cfg.replication and create appliers.
  */
@@ -835,6 +905,7 @@ void
 box_set_replication_timeout(void)
 {
 	replication_timeout = box_check_replication_timeout();
+	raft_cfg_death_timeout();
 }
 
 void
@@ -865,6 +936,7 @@ box_set_replication_synchro_quorum(void)
 		return -1;
 	replication_synchro_quorum = value;
 	txn_limbo_on_parameters_change(&txn_limbo);
+	raft_cfg_election_quorum();
 	return 0;
 }
 
@@ -2671,6 +2743,25 @@ box_cfg_xc(void)
 
 	fiber_gc();
 	is_box_configured = true;
+	/*
+	 * Fill in Raft parameters after bootstrap. Before it is not possible -
+	 * there may be Raft data to recover from WAL and snapshot. Also until
+	 * recovery is done, it is not possible to write new records into WAL.
+	 * It is also totally safe, because relaying is not started until the
+	 * box is configured. So it can't happen, that this Raft node will try
+	 * to relay to another Raft node without Raft enabled leading to
+	 * disconnect.
+	 */
+	if (box_set_raft_is_candidate() != 0)
+		diag_raise();
+	if (box_set_raft_election_timeout() != 0)
+		diag_raise();
+	/*
+	 * Raft is enabled last. So as all the parameters are installed by that
+	 * time.
+	 */
+	if (box_set_raft_is_enabled() != 0)
+		diag_raise();
 
 	title("running");
 	say_info("ready to accept requests");
diff --git a/src/box/box.h b/src/box/box.h
index 5988264a5..637d10dd3 100644
--- a/src/box/box.h
+++ b/src/box/box.h
@@ -245,6 +245,9 @@ void box_set_vinyl_memory(void);
 void box_set_vinyl_max_tuple_size(void);
 void box_set_vinyl_cache(void);
 void box_set_vinyl_timeout(void);
+int box_set_raft_is_enabled(void);
+int box_set_raft_is_candidate(void);
+int box_set_raft_election_timeout(void);
 void box_set_replication_timeout(void);
 void box_set_replication_connect_timeout(void);
 void box_set_replication_connect_quorum(void);
diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc
index d481155cd..339b85f9d 100644
--- a/src/box/lua/cfg.cc
+++ b/src/box/lua/cfg.cc
@@ -269,6 +269,30 @@ lbox_cfg_set_worker_pool_threads(struct lua_State *L)
 	return 0;
 }
 
+static int
+lbox_cfg_set_raft_is_enabled(struct lua_State *L)
+{
+	if (box_set_raft_is_enabled() != 0)
+		luaT_error(L);
+	return 0;
+}
+
+static int
+lbox_cfg_set_raft_is_candidate(struct lua_State *L)
+{
+	if (box_set_raft_is_candidate() != 0)
+		luaT_error(L);
+	return 0;
+}
+
+static int
+lbox_cfg_set_raft_election_timeout(struct lua_State *L)
+{
+	if (box_set_raft_election_timeout() != 0)
+		luaT_error(L);
+	return 0;
+}
+
 static int
 lbox_cfg_set_replication_timeout(struct lua_State *L)
 {
@@ -382,6 +406,9 @@ box_lua_cfg_init(struct lua_State *L)
 		{"cfg_set_vinyl_max_tuple_size", lbox_cfg_set_vinyl_max_tuple_size},
 		{"cfg_set_vinyl_cache", lbox_cfg_set_vinyl_cache},
 		{"cfg_set_vinyl_timeout", lbox_cfg_set_vinyl_timeout},
+		{"cfg_set_raft_is_enabled", lbox_cfg_set_raft_is_enabled},
+		{"cfg_set_raft_is_candidate", lbox_cfg_set_raft_is_candidate},
+		{"cfg_set_raft_election_timeout", lbox_cfg_set_raft_election_timeout},
 		{"cfg_set_replication_timeout", lbox_cfg_set_replication_timeout},
 		{"cfg_set_replication_connect_quorum", lbox_cfg_set_replication_connect_quorum},
 		{"cfg_set_replication_connect_timeout", lbox_cfg_set_replication_connect_timeout},
diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua
index 53f572895..2c98fd837 100644
--- a/src/box/lua/load_cfg.lua
+++ b/src/box/lua/load_cfg.lua
@@ -86,6 +86,9 @@ local default_cfg = {
     checkpoint_wal_threshold = 1e18,
     checkpoint_count    = 2,
     worker_pool_threads = 4,
+    raft_is_enabled       = false,
+    raft_is_candidate     = true,
+    raft_election_timeout = 5,
     replication_timeout = 1,
     replication_sync_lag = 10,
     replication_sync_timeout = 300,
@@ -163,6 +166,9 @@ local template_cfg = {
     read_only           = 'boolean',
     hot_standby         = 'boolean',
     worker_pool_threads = 'number',
+    raft_is_enabled       = 'boolean',
+    raft_is_candidate     = 'boolean',
+    raft_election_timeout = 'number',
     replication_timeout = 'number',
     replication_sync_lag = 'number',
     replication_sync_timeout = 'number',
@@ -279,6 +285,9 @@ local dynamic_cfg = {
         require('title').update(box.cfg.custom_proc_title)
     end,
     force_recovery          = function() end,
+    raft_is_enabled         = private.cfg_set_raft_is_enabled,
+    raft_is_candidate       = private.cfg_set_raft_is_candidate,
+    raft_election_timeout   = private.cfg_set_raft_election_timeout,
     replication_timeout     = private.cfg_set_replication_timeout,
     replication_connect_timeout = private.cfg_set_replication_connect_timeout,
     replication_connect_quorum = private.cfg_set_replication_connect_quorum,
@@ -333,6 +342,9 @@ local dynamic_cfg_order = {
     -- the new one. This should be fixed when box.cfg is able to
     -- apply some parameters together and atomically.
     replication_anon        = 250,
+    raft_is_enabled         = 300,
+    raft_is_candidate       = 310,
+    raft_election_timeout   = 320,
 }
 
 local function sort_cfg_cb(l, r)
@@ -350,6 +362,9 @@ local dynamic_cfg_skip_at_load = {
     vinyl_cache             = true,
     vinyl_timeout           = true,
     too_long_threshold      = true,
+    raft_is_enabled         = true,
+    raft_is_candidate       = true,
+    raft_election_timeout   = true,
     replication             = true,
     replication_timeout     = true,
     replication_connect_timeout = true,
diff --git a/src/box/raft.c b/src/box/raft.c
index 511fe42f5..ee54d02b7 100644
--- a/src/box/raft.c
+++ b/src/box/raft.c
@@ -37,6 +37,8 @@
 
 /** Raft state of this instance. */
 struct raft raft = {
+	.is_enabled = false,
+	.is_candidate = false,
 	.term = 1,
 	.vote = 0,
 };
@@ -63,3 +65,31 @@ raft_serialize_for_disk(struct raft_request *req)
 	req->term = raft.term;
 	req->vote = raft.vote;
 }
+
+void
+raft_cfg_is_enabled(bool is_enabled)
+{
+	raft.is_enabled = is_enabled;
+}
+
+void
+raft_cfg_is_candidate(bool is_candidate)
+{
+	raft.is_candidate = is_candidate;
+}
+
+void
+raft_cfg_election_timeout(double timeout)
+{
+	raft.election_timeout = timeout;
+}
+
+void
+raft_cfg_election_quorum(void)
+{
+}
+
+void
+raft_cfg_death_timeout(void)
+{
+}
diff --git a/src/box/raft.h b/src/box/raft.h
index 31f7becdb..f27222752 100644
--- a/src/box/raft.h
+++ b/src/box/raft.h
@@ -30,6 +30,7 @@
  * SUCH DAMAGE.
  */
 #include <stdint.h>
+#include <stdbool.h>
 
 #if defined(__cplusplus)
 extern "C" {
@@ -38,8 +39,11 @@ extern "C" {
 struct raft_request;
 
 struct raft {
+	bool is_enabled;
+	bool is_candidate;
 	uint64_t term;
 	uint32_t vote;
+	double election_timeout;
 };
 
 extern struct raft raft;
@@ -48,6 +52,37 @@ extern struct raft raft;
 void
 raft_process_recovery(const struct raft_request *req);
 
+/** Configure whether Raft is enabled. */
+void
+raft_cfg_is_enabled(bool is_enabled);
+
+/**
+ * Configure whether the instance can be elected as Raft leader. Even if false,
+ * the node still can vote, when Raft is enabled.
+ */
+void
+raft_cfg_is_candidate(bool is_candidate);
+
+/** Configure Raft leader election timeout. */
+void
+raft_cfg_election_timeout(double timeout);
+
+/**
+ * Configure Raft leader election quorum. There is no a separate option.
+ * Instead, synchronous replication quorum is used. Since Raft is tightly bound
+ * with synchronous replication.
+ */
+void
+raft_cfg_election_quorum(void);
+
+/**
+ * Configure Raft leader death timeout. I.e. number of seconds without
+ * heartbeats from the leader to consider it dead. There is no a separate
+ * option. Raft uses replication timeout for that.
+ */
+void
+raft_cfg_death_timeout(void);
+
 /**
  * Save complete Raft state into a request to be sent to other instances of the
  * cluster. It is allowed to save anything here, not only persistent state.
diff --git a/test/app-tap/init_script.result b/test/app-tap/init_script.result
index 857f0c95f..1d191987a 100644
--- a/test/app-tap/init_script.result
+++ b/test/app-tap/init_script.result
@@ -23,6 +23,9 @@ memtx_memory:107374182
 memtx_min_tuple_size:16
 net_msg_max:768
 pid_file:box.pid
+raft_election_timeout:5
+raft_is_candidate:true
+raft_is_enabled:false
 read_only:false
 readahead:16320
 replication_anon:false
diff --git a/test/box/admin.result b/test/box/admin.result
index ab3e80a97..13536a318 100644
--- a/test/box/admin.result
+++ b/test/box/admin.result
@@ -67,6 +67,12 @@ cfg_filter(box.cfg)
     - 768
   - - pid_file
     - <hidden>
+  - - raft_election_timeout
+    - 5
+  - - raft_is_candidate
+    - true
+  - - raft_is_enabled
+    - false
   - - read_only
     - false
   - - readahead
diff --git a/test/box/cfg.result b/test/box/cfg.result
index bdd210b09..11358b2cd 100644
--- a/test/box/cfg.result
+++ b/test/box/cfg.result
@@ -55,6 +55,12 @@ cfg_filter(box.cfg)
  |     - 768
  |   - - pid_file
  |     - <hidden>
+ |   - - raft_election_timeout
+ |     - 5
+ |   - - raft_is_candidate
+ |     - true
+ |   - - raft_is_enabled
+ |     - false
  |   - - read_only
  |     - false
  |   - - readahead
@@ -162,6 +168,12 @@ cfg_filter(box.cfg)
  |     - 768
  |   - - pid_file
  |     - <hidden>
+ |   - - raft_election_timeout
+ |     - 5
+ |   - - raft_is_candidate
+ |     - true
+ |   - - raft_is_enabled
+ |     - false
  |   - - read_only
  |     - false
  |   - - readahead
-- 
2.21.1 (Apple Git-122.3)



More information about the Tarantool-patches mailing list