[Tarantool-patches] [PATCH v3 06/10] raft: introduce box.cfg.election_* options

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Wed Sep 30 01:11:28 MSK 2020


The new options are:

- election_is_enabled - enable/disable leader election (via
  Raft). When disabled, the node is supposed to work like if Raft
  does not exist. Like earlier;

- election_is_candidate - a flag whether the instance can try to
  become a leader. Note, it can vote for other nodes regardless
  of value of this option;

- election_timeout - how long need to wait until election end, in
  seconds.

The options don't do anything now. They are added separately in
order to keep such mundane changes from the main Raft commit, to
simplify its review.

Option names don't mention 'Raft' on purpose, because
- Not all users know what is Raft, so they may not even know it
  is related to leader election;
- In future the algorithm may change from Raft to something else,
  so better not to depend on it too much in the public API.

Part of #1146
---
 src/box/box.cc                  | 92 +++++++++++++++++++++++++++++++++
 src/box/box.h                   |  3 ++
 src/box/lua/cfg.cc              | 27 ++++++++++
 src/box/lua/load_cfg.lua        | 15 ++++++
 src/box/raft.c                  | 30 +++++++++++
 src/box/raft.h                  | 35 +++++++++++++
 test/app-tap/init_script.result |  3 ++
 test/box/admin.result           |  6 +++
 test/box/cfg.result             | 12 +++++
 9 files changed, 223 insertions(+)

diff --git a/src/box/box.cc b/src/box/box.cc
index 48fed9b2c..99a15bfd0 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -472,6 +472,40 @@ box_check_uri(const char *source, const char *option_name)
 	}
 }
 
+static int
+box_check_election_is_enabled(void)
+{
+	int b = cfg_getb("election_is_enabled");
+	if (b < 0) {
+		diag_set(ClientError, ER_CFG, "election_is_enabled",
+			 "the value must be a boolean");
+	}
+	return b;
+}
+
+static int
+box_check_election_is_candidate(void)
+{
+	int b = cfg_getb("election_is_candidate");
+	if (b < 0) {
+		diag_set(ClientError, ER_CFG, "election_is_candidate",
+			 "the value must be a boolean");
+	}
+	return b;
+}
+
+static double
+box_check_election_timeout(void)
+{
+	double d = cfg_getd("election_timeout");
+	if (d <= 0) {
+		diag_set(ClientError, ER_CFG, "election_timeout",
+			 "the value must be a positive number");
+		return -1;
+	}
+	return d;
+}
+
 static void
 box_check_replication(void)
 {
@@ -729,6 +763,12 @@ box_check_config(void)
 	box_check_uri(cfg_gets("listen"), "listen");
 	box_check_instance_uuid(&uuid);
 	box_check_replicaset_uuid(&uuid);
+	if (box_check_election_is_enabled() < 0)
+		diag_raise();
+	if (box_check_election_is_candidate() < 0)
+		diag_raise();
+	if (box_check_election_timeout() < 0)
+		diag_raise();
 	box_check_replication();
 	box_check_replication_timeout();
 	box_check_replication_connect_timeout();
@@ -751,6 +791,36 @@ box_check_config(void)
 		diag_raise();
 }
 
+int
+box_set_election_is_enabled(void)
+{
+	int b = box_check_election_is_enabled();
+	if (b < 0)
+		return -1;
+	raft_cfg_is_enabled(b);
+	return 0;
+}
+
+int
+box_set_election_is_candidate(void)
+{
+	int b = box_check_election_is_candidate();
+	if (b < 0)
+		return -1;
+	raft_cfg_is_candidate(b);
+	return 0;
+}
+
+int
+box_set_election_timeout(void)
+{
+	double d = box_check_election_timeout();
+	if (d < 0)
+		return -1;
+	raft_cfg_election_timeout(d);
+	return 0;
+}
+
 /*
  * Parse box.cfg.replication and create appliers.
  */
@@ -835,6 +905,7 @@ void
 box_set_replication_timeout(void)
 {
 	replication_timeout = box_check_replication_timeout();
+	raft_cfg_death_timeout();
 }
 
 void
@@ -865,6 +936,7 @@ box_set_replication_synchro_quorum(void)
 		return -1;
 	replication_synchro_quorum = value;
 	txn_limbo_on_parameters_change(&txn_limbo);
+	raft_cfg_election_quorum();
 	return 0;
 }
 
@@ -2686,6 +2758,26 @@ box_cfg_xc(void)
 
 	fiber_gc();
 	is_box_configured = true;
+	/*
+	 * Fill in leader election parameters after bootstrap. Before it is not
+	 * possible - there may be relevant data to recover from WAL and
+	 * snapshot. Also until recovery is done, it is not possible to write
+	 * new records into WAL. It is also totally safe, because relaying is
+	 * not started until the box is configured. So it can't happen, that
+	 * this election-enabled node will try to relay to another
+	 * election-enabled node without election actually enabled leading to
+	 * disconnect.
+	 */
+	if (box_set_election_is_candidate() != 0)
+		diag_raise();
+	if (box_set_election_timeout() != 0)
+		diag_raise();
+	/*
+	 * Election is enabled last. So as all the parameters are installed by
+	 * that time.
+	 */
+	if (box_set_election_is_enabled() != 0)
+		diag_raise();
 
 	title("running");
 	say_info("ready to accept requests");
diff --git a/src/box/box.h b/src/box/box.h
index 5988264a5..45ff8bbbf 100644
--- a/src/box/box.h
+++ b/src/box/box.h
@@ -245,6 +245,9 @@ void box_set_vinyl_memory(void);
 void box_set_vinyl_max_tuple_size(void);
 void box_set_vinyl_cache(void);
 void box_set_vinyl_timeout(void);
+int box_set_election_is_enabled(void);
+int box_set_election_is_candidate(void);
+int box_set_election_timeout(void);
 void box_set_replication_timeout(void);
 void box_set_replication_connect_timeout(void);
 void box_set_replication_connect_quorum(void);
diff --git a/src/box/lua/cfg.cc b/src/box/lua/cfg.cc
index d481155cd..bbb92f038 100644
--- a/src/box/lua/cfg.cc
+++ b/src/box/lua/cfg.cc
@@ -269,6 +269,30 @@ lbox_cfg_set_worker_pool_threads(struct lua_State *L)
 	return 0;
 }
 
+static int
+lbox_cfg_set_election_is_enabled(struct lua_State *L)
+{
+	if (box_set_election_is_enabled() != 0)
+		luaT_error(L);
+	return 0;
+}
+
+static int
+lbox_cfg_set_election_is_candidate(struct lua_State *L)
+{
+	if (box_set_election_is_candidate() != 0)
+		luaT_error(L);
+	return 0;
+}
+
+static int
+lbox_cfg_set_election_timeout(struct lua_State *L)
+{
+	if (box_set_election_timeout() != 0)
+		luaT_error(L);
+	return 0;
+}
+
 static int
 lbox_cfg_set_replication_timeout(struct lua_State *L)
 {
@@ -382,6 +406,9 @@ box_lua_cfg_init(struct lua_State *L)
 		{"cfg_set_vinyl_max_tuple_size", lbox_cfg_set_vinyl_max_tuple_size},
 		{"cfg_set_vinyl_cache", lbox_cfg_set_vinyl_cache},
 		{"cfg_set_vinyl_timeout", lbox_cfg_set_vinyl_timeout},
+		{"cfg_set_election_is_enabled", lbox_cfg_set_election_is_enabled},
+		{"cfg_set_election_is_candidate", lbox_cfg_set_election_is_candidate},
+		{"cfg_set_election_timeout", lbox_cfg_set_election_timeout},
 		{"cfg_set_replication_timeout", lbox_cfg_set_replication_timeout},
 		{"cfg_set_replication_connect_quorum", lbox_cfg_set_replication_connect_quorum},
 		{"cfg_set_replication_connect_timeout", lbox_cfg_set_replication_connect_timeout},
diff --git a/src/box/lua/load_cfg.lua b/src/box/lua/load_cfg.lua
index 92347a9fd..d558e7ac9 100644
--- a/src/box/lua/load_cfg.lua
+++ b/src/box/lua/load_cfg.lua
@@ -87,6 +87,9 @@ local default_cfg = {
     checkpoint_wal_threshold = 1e18,
     checkpoint_count    = 2,
     worker_pool_threads = 4,
+    election_is_enabled = false,
+    election_is_candidate = true,
+    election_timeout    = 5,
     replication_timeout = 1,
     replication_sync_lag = 10,
     replication_sync_timeout = 300,
@@ -165,6 +168,9 @@ local template_cfg = {
     hot_standby         = 'boolean',
     memtx_use_mvcc_engine = 'boolean',
     worker_pool_threads = 'number',
+    election_is_enabled = 'boolean',
+    election_is_candidate = 'boolean',
+    election_timeout    = 'number',
     replication_timeout = 'number',
     replication_sync_lag = 'number',
     replication_sync_timeout = 'number',
@@ -281,6 +287,9 @@ local dynamic_cfg = {
         require('title').update(box.cfg.custom_proc_title)
     end,
     force_recovery          = function() end,
+    election_is_enabled     = private.cfg_set_election_is_enabled,
+    election_is_candidate   = private.cfg_set_election_is_candidate,
+    election_timeout        = private.cfg_set_election_timeout,
     replication_timeout     = private.cfg_set_replication_timeout,
     replication_connect_timeout = private.cfg_set_replication_connect_timeout,
     replication_connect_quorum = private.cfg_set_replication_connect_quorum,
@@ -335,6 +344,9 @@ local dynamic_cfg_order = {
     -- the new one. This should be fixed when box.cfg is able to
     -- apply some parameters together and atomically.
     replication_anon        = 250,
+    election_is_enabled     = 300,
+    election_is_candidate   = 310,
+    election_timeout        = 320,
 }
 
 local function sort_cfg_cb(l, r)
@@ -352,6 +364,9 @@ local dynamic_cfg_skip_at_load = {
     vinyl_cache             = true,
     vinyl_timeout           = true,
     too_long_threshold      = true,
+    election_is_enabled     = true,
+    election_is_candidate   = true,
+    election_timeout        = true,
     replication             = true,
     replication_timeout     = true,
     replication_connect_timeout = true,
diff --git a/src/box/raft.c b/src/box/raft.c
index 511fe42f5..ee54d02b7 100644
--- a/src/box/raft.c
+++ b/src/box/raft.c
@@ -37,6 +37,8 @@
 
 /** Raft state of this instance. */
 struct raft raft = {
+	.is_enabled = false,
+	.is_candidate = false,
 	.term = 1,
 	.vote = 0,
 };
@@ -63,3 +65,31 @@ raft_serialize_for_disk(struct raft_request *req)
 	req->term = raft.term;
 	req->vote = raft.vote;
 }
+
+void
+raft_cfg_is_enabled(bool is_enabled)
+{
+	raft.is_enabled = is_enabled;
+}
+
+void
+raft_cfg_is_candidate(bool is_candidate)
+{
+	raft.is_candidate = is_candidate;
+}
+
+void
+raft_cfg_election_timeout(double timeout)
+{
+	raft.election_timeout = timeout;
+}
+
+void
+raft_cfg_election_quorum(void)
+{
+}
+
+void
+raft_cfg_death_timeout(void)
+{
+}
diff --git a/src/box/raft.h b/src/box/raft.h
index 31f7becdb..f27222752 100644
--- a/src/box/raft.h
+++ b/src/box/raft.h
@@ -30,6 +30,7 @@
  * SUCH DAMAGE.
  */
 #include <stdint.h>
+#include <stdbool.h>
 
 #if defined(__cplusplus)
 extern "C" {
@@ -38,8 +39,11 @@ extern "C" {
 struct raft_request;
 
 struct raft {
+	bool is_enabled;
+	bool is_candidate;
 	uint64_t term;
 	uint32_t vote;
+	double election_timeout;
 };
 
 extern struct raft raft;
@@ -48,6 +52,37 @@ extern struct raft raft;
 void
 raft_process_recovery(const struct raft_request *req);
 
+/** Configure whether Raft is enabled. */
+void
+raft_cfg_is_enabled(bool is_enabled);
+
+/**
+ * Configure whether the instance can be elected as Raft leader. Even if false,
+ * the node still can vote, when Raft is enabled.
+ */
+void
+raft_cfg_is_candidate(bool is_candidate);
+
+/** Configure Raft leader election timeout. */
+void
+raft_cfg_election_timeout(double timeout);
+
+/**
+ * Configure Raft leader election quorum. There is no a separate option.
+ * Instead, synchronous replication quorum is used. Since Raft is tightly bound
+ * with synchronous replication.
+ */
+void
+raft_cfg_election_quorum(void);
+
+/**
+ * Configure Raft leader death timeout. I.e. number of seconds without
+ * heartbeats from the leader to consider it dead. There is no a separate
+ * option. Raft uses replication timeout for that.
+ */
+void
+raft_cfg_death_timeout(void);
+
 /**
  * Save complete Raft state into a request to be sent to other instances of the
  * cluster. It is allowed to save anything here, not only persistent state.
diff --git a/test/app-tap/init_script.result b/test/app-tap/init_script.result
index c8974d708..d8969278b 100644
--- a/test/app-tap/init_script.result
+++ b/test/app-tap/init_script.result
@@ -8,6 +8,9 @@ checkpoint_count:2
 checkpoint_interval:3600
 checkpoint_wal_threshold:1e+18
 coredump:false
+election_is_candidate:true
+election_is_enabled:false
+election_timeout:5
 feedback_enabled:true
 feedback_host:https://feedback.tarantool.io
 feedback_interval:3600
diff --git a/test/box/admin.result b/test/box/admin.result
index d1540a71e..52b62356f 100644
--- a/test/box/admin.result
+++ b/test/box/admin.result
@@ -37,6 +37,12 @@ cfg_filter(box.cfg)
     - 1000000000000000000
   - - coredump
     - false
+  - - election_is_candidate
+    - true
+  - - election_is_enabled
+    - false
+  - - election_timeout
+    - 5
   - - feedback_enabled
     - true
   - - feedback_host
diff --git a/test/box/cfg.result b/test/box/cfg.result
index fcfc64b22..f19f4bff7 100644
--- a/test/box/cfg.result
+++ b/test/box/cfg.result
@@ -25,6 +25,12 @@ cfg_filter(box.cfg)
  |     - 1000000000000000000
  |   - - coredump
  |     - false
+ |   - - election_is_candidate
+ |     - true
+ |   - - election_is_enabled
+ |     - false
+ |   - - election_timeout
+ |     - 5
  |   - - feedback_enabled
  |     - true
  |   - - feedback_host
@@ -134,6 +140,12 @@ cfg_filter(box.cfg)
  |     - 1000000000000000000
  |   - - coredump
  |     - false
+ |   - - election_is_candidate
+ |     - true
+ |   - - election_is_enabled
+ |     - false
+ |   - - election_timeout
+ |     - 5
  |   - - feedback_enabled
  |     - true
  |   - - feedback_host
-- 
2.21.1 (Apple Git-122.3)



More information about the Tarantool-patches mailing list