[Tarantool-patches] [PATCH v2 3/5] raft: replace raft_start_candidate with _promote

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Sun Jul 18 19:53:27 MSK 2021


raft_state_candiate() made the instance a candidate until it was
reconfigured or raft_stop_candidate() was called. It was used
during promotion of a 'manual' instance.

This however was relatively hard to use - raft_stop_candidate()
had a flag 'do_demote' which was passed as true only by Raft
internally, and it also wasn't very consistent with box.promote()
which works for one term only.

One of the next patches is going to need Raft promotion again but
for just one term. In order not to use raft_start/stop_leader()
again, this patch replaces them with raft_promote() and
raft_restore().

raft_promote() works very similar to box.promote() - it bumps the
term and makes the instance a candidate for this term. If it wins,
it stays a leader until next term happens.

Otherwise either a new term is started and the node stops being a
leader, or another node becomes a leader. In the former case
raft_promote() might be called again to retry.

Replacement couldn't be done independently from box/ changes,
because raft_start/stop_candidate couldn't be kept - any term
bump runs raft_restore() which is the same as
raft_stop_candidate(do_demote=true), and that makes the old
functions useless. Hence the patch also includes box/ changes.

Part of #6018
---
 src/box/box.cc              | 27 +++-------
 src/box/errcode.h           |  2 +-
 src/box/raft.c              | 58 ++++++++++++++++++----
 src/box/raft.h              |  4 +-
 src/lib/raft/raft.c         | 98 ++++++++++++++++++++-----------------
 src/lib/raft/raft.h         | 13 +++--
 test/box/error.result       |  2 +-
 test/unit/raft.c            | 41 +++++++++-------
 test/unit/raft.result       | 22 +++++----
 test/unit/raft_test_utils.c | 17 +++----
 test/unit/raft_test_utils.h | 16 ++----
 11 files changed, 169 insertions(+), 131 deletions(-)

diff --git a/src/box/box.cc b/src/box/box.cc
index 673f6bed6..b828d3a31 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1595,25 +1595,14 @@ box_promote(void)
 		 * Make this instance a candidate and run until some leader, not
 		 * necessarily this instance, emerges.
 		 */
-		raft_start_candidate(raft);
-		/*
-		 * Trigger new elections without waiting for an old leader to
-		 * disappear.
-		 */
-		raft_new_term(raft);
-		rc = box_raft_wait_leader_found();
-		/*
-		 * Do not reset raft mode if it was changed while running the
-		 * elections.
-		 */
-		if (box_election_mode == ELECTION_MODE_MANUAL)
-			raft_stop_candidate(raft, false);
-		if (rc != 0)
-			return -1;
-		if (!raft->is_enabled) {
-			diag_set(ClientError, ER_RAFT_DISABLED);
-			return -1;
-		}
+		do {
+			raft_promote(raft);
+			rc = box_raft_wait_term_outcome();
+			if (rc != 0) {
+				raft_restore(raft);
+				return -1;
+			}
+		} while (raft->leader == 0);
 		if (raft->state != RAFT_STATE_LEADER) {
 			diag_set(ClientError, ER_INTERFERING_PROMOTE,
 				 raft->leader);
diff --git a/src/box/errcode.h b/src/box/errcode.h
index 49aec4bf6..d2854677f 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -276,7 +276,7 @@ struct errcode_record {
 	/*221 */_(ER_SQL_CANT_ADD_AUTOINC,	"Can't add AUTOINCREMENT: space %s can't feature more than one AUTOINCREMENT field") \
 	/*222 */_(ER_QUORUM_WAIT,		"Couldn't wait for quorum %d: %s") \
 	/*223 */_(ER_INTERFERING_PROMOTE,	"Instance with replica id %u was promoted first") \
-	/*224 */_(ER_RAFT_DISABLED,		"Elections were turned off while running box.ctl.promote()")\
+	/*224 */_(ER_ELECTION_DISABLED,		"Elections were turned off")\
 	/*225 */_(ER_TXN_ROLLBACK,		"Transaction was rolled back") \
 
 /*
diff --git a/src/box/raft.c b/src/box/raft.c
index 7f787c0c5..eb62e9630 100644
--- a/src/box/raft.c
+++ b/src/box/raft.c
@@ -327,30 +327,70 @@ fail:
 	panic("Could not write a raft request to WAL\n");
 }
 
+/**
+ * Context of waiting for a Raft term outcome. Which is either a leader is
+ * elected, or a new term starts, or Raft is disabled.
+ */
+struct box_raft_watch_ctx {
+	bool is_done;
+	uint64_t term;
+	struct fiber *owner;
+};
+
 static int
-box_raft_wait_leader_found_f(struct trigger *trig, void *event)
+box_raft_wait_term_outcome_f(struct trigger *trig, void *event)
 {
 	struct raft *raft = event;
 	assert(raft == box_raft());
-	struct fiber *waiter = trig->data;
-	if (raft->leader != REPLICA_ID_NIL || !raft->is_enabled)
-		fiber_wakeup(waiter);
+	struct box_raft_watch_ctx *ctx = trig->data;
+	/*
+	 * Term ended with nothing, probably split vote which led to a next
+	 * term.
+	 */
+	if (raft->volatile_term > ctx->term)
+		goto done;
+	/* Instance does not participate in terms anymore. */
+	if (!raft->is_enabled)
+		goto done;
+	/* The term ended with a leader being found. */
+	if (raft->leader != REPLICA_ID_NIL)
+		goto done;
+	/* The term still continues with no resolution. */
+	return 0;
+done:
+	ctx->is_done = true;
+	fiber_wakeup(ctx->owner);
 	return 0;
 }
 
 int
-box_raft_wait_leader_found(void)
+box_raft_wait_term_outcome(void)
 {
+	struct raft *raft = box_raft();
 	struct trigger trig;
-	trigger_create(&trig, box_raft_wait_leader_found_f, fiber(), NULL);
-	raft_on_update(box_raft(), &trig);
-	fiber_yield();
+	struct box_raft_watch_ctx ctx = {
+		.is_done = false,
+		.term = raft->volatile_term,
+		.owner = fiber(),
+	};
+	trigger_create(&trig, box_raft_wait_term_outcome_f, &ctx, NULL);
+	raft_on_update(raft, &trig);
+	/*
+	 * XXX: it is not a good idea not to have a timeout here. If all nodes
+	 * are voters, the term might never end with any result nor bump to a
+	 * new value.
+	 */
+	while (!fiber_is_cancelled() && !ctx.is_done)
+		fiber_yield();
 	trigger_clear(&trig);
 	if (fiber_is_cancelled()) {
 		diag_set(FiberIsCancelled);
 		return -1;
 	}
-	assert(box_raft()->leader != REPLICA_ID_NIL || !box_raft()->is_enabled);
+	if (!raft->is_enabled) {
+		diag_set(ClientError, ER_ELECTION_DISABLED);
+		return -1;
+	}
 	return 0;
 }
 
diff --git a/src/box/raft.h b/src/box/raft.h
index 6b6136510..d55e90d2f 100644
--- a/src/box/raft.h
+++ b/src/box/raft.h
@@ -97,9 +97,9 @@ box_raft_checkpoint_remote(struct raft_request *req);
 int
 box_raft_process(struct raft_request *req, uint32_t source);
 
-/** Block this fiber until Raft leader is known. */
+/** Block this fiber until the current term's outcome is known. */
 int
-box_raft_wait_leader_found();
+box_raft_wait_term_outcome(void);
 
 void
 box_raft_init(void);
diff --git a/src/lib/raft/raft.c b/src/lib/raft/raft.c
index ef11ef89f..24bf89dda 100644
--- a/src/lib/raft/raft.c
+++ b/src/lib/raft/raft.c
@@ -663,6 +663,11 @@ raft_sm_schedule_new_term(struct raft *raft, uint64_t new_term)
 	raft->volatile_vote = 0;
 	raft->leader = 0;
 	raft->state = RAFT_STATE_FOLLOWER;
+	/*
+	 * The instance could be promoted for the previous term. But promotion
+	 * has no effect on following terms.
+	 */
+	raft_restore(raft);
 	raft_sm_pause_and_dump(raft);
 	/*
 	 * State is visible and it is changed - broadcast. Term is also visible,
@@ -851,30 +856,15 @@ raft_cfg_is_enabled(struct raft *raft, bool is_enabled)
 		raft_sm_start(raft);
 }
 
-void
-raft_cfg_is_candidate(struct raft *raft, bool is_candidate)
-{
-	raft->is_cfg_candidate = is_candidate;
-	is_candidate = is_candidate && raft->is_enabled;
-	if (is_candidate)
-		raft_start_candidate(raft);
-	else
-		raft_stop_candidate(raft, true);
-}
-
-void
+/** Make the instance a candidate. */
+static void
 raft_start_candidate(struct raft *raft)
 {
+	assert(raft->is_enabled);
 	if (raft->is_candidate)
 		return;
+	assert(raft->state == RAFT_STATE_FOLLOWER);
 	raft->is_candidate = true;
-	assert(raft->state != RAFT_STATE_CANDIDATE);
-	/*
-	 * May still be the leader after raft_stop_candidate
-	 * with do_demote = false.
-	 */
-	if (raft->state == RAFT_STATE_LEADER)
-		return;
 	if (raft->is_write_in_progress) {
 		/*
 		 * If there is an on-going WAL write, it means there was
@@ -889,35 +879,53 @@ raft_start_candidate(struct raft *raft)
 	}
 }
 
-void
-raft_stop_candidate(struct raft *raft, bool do_demote)
+/**
+ * Make the instance stop taking part in new elections and demote if it was a
+ * leader.
+ */
+static void
+raft_stop_candidate(struct raft *raft)
 {
-	/*
-	 * May still be the leader after raft_stop_candidate
-	 * with do_demote = false.
-	 */
-	if (!raft->is_candidate && raft->state != RAFT_STATE_LEADER)
+	if (!raft->is_candidate)
 		return;
 	raft->is_candidate = false;
-	if (raft->state != RAFT_STATE_LEADER) {
-		/* Do not wait for anything while being a voter. */
-		raft_ev_timer_stop(raft_loop(), &raft->timer);
-	}
-	if (raft->state != RAFT_STATE_FOLLOWER) {
-		if (raft->state == RAFT_STATE_LEADER) {
-			if (!do_demote) {
-				/*
-				 * Remain leader until someone
-				 * triggers new elections.
-				 */
-				return;
-			}
-			raft->leader = 0;
-		}
-		raft->state = RAFT_STATE_FOLLOWER;
-		/* State is visible and changed - broadcast. */
-		raft_schedule_broadcast(raft);
-	}
+	/* Do not wait for anything while not being a candidate. */
+	raft_ev_timer_stop(raft_loop(), &raft->timer);
+	if (raft->state == RAFT_STATE_LEADER)
+		raft->leader = 0;
+	raft->state = RAFT_STATE_FOLLOWER;
+	raft_schedule_broadcast(raft);
+}
+
+static inline void
+raft_set_candidate(struct raft *raft, bool is_candidate)
+{
+	if (is_candidate)
+		raft_start_candidate(raft);
+	else
+		raft_stop_candidate(raft);
+}
+
+void
+raft_cfg_is_candidate(struct raft *raft, bool is_candidate)
+{
+	raft->is_cfg_candidate = is_candidate;
+	raft_restore(raft);
+}
+
+void
+raft_promote(struct raft *raft)
+{
+	if (!raft->is_enabled)
+		return;
+	raft_sm_schedule_new_term(raft, raft->volatile_term + 1);
+	raft_start_candidate(raft);
+}
+
+void
+raft_restore(struct raft *raft)
+{
+	raft_set_candidate(raft, raft->is_cfg_candidate && raft->is_enabled);
 }
 
 void
diff --git a/src/lib/raft/raft.h b/src/lib/raft/raft.h
index 223e0509f..53e2c58a8 100644
--- a/src/lib/raft/raft.h
+++ b/src/lib/raft/raft.h
@@ -275,17 +275,20 @@ void
 raft_cfg_is_candidate(struct raft *raft, bool is_candidate);
 
 /**
- * Make the instance a candidate.
+ * Bump the term and become a candidate for it regardless of the config. In case
+ * of another term bump the node's role is restored according to its config
+ * automatically.
  */
 void
-raft_start_candidate(struct raft *raft);
+raft_promote(struct raft *raft);
 
 /**
- * Make the instance stop taking part in new elections.
- * @param do_demote whether to stop being a leader immediately or not.
+ * Restore the instance role according to its config. In particular, if it was
+ * promoted and elected in the current term despite its config, restoration
+ * makes it a follower.
  */
 void
-raft_stop_candidate(struct raft *raft, bool do_demote);
+raft_restore(struct raft *raft);
 
 /** Configure Raft leader election timeout. */
 void
diff --git a/test/box/error.result b/test/box/error.result
index 062a90399..25def757e 100644
--- a/test/box/error.result
+++ b/test/box/error.result
@@ -442,7 +442,7 @@ t;
  |   221: box.error.SQL_CANT_ADD_AUTOINC
  |   222: box.error.QUORUM_WAIT
  |   223: box.error.INTERFERING_PROMOTE
- |   224: box.error.RAFT_DISABLED
+ |   224: box.error.ELECTION_DISABLED
  |   225: box.error.TXN_ROLLBACK
  | ...
 
diff --git a/test/unit/raft.c b/test/unit/raft.c
index b9bc49b78..de7ada159 100644
--- a/test/unit/raft.c
+++ b/test/unit/raft.c
@@ -1354,38 +1354,31 @@ raft_test_too_long_wal_write(void)
 }
 
 static void
-raft_test_start_stop_candidate(void)
+raft_test_promote_restore(void)
 {
-	raft_start_test(8);
+	raft_start_test(12);
 	struct raft_node node;
 	raft_node_create(&node);
 
 	raft_node_cfg_is_candidate(&node, false);
 	raft_node_cfg_election_quorum(&node, 1);
 
-	raft_node_start_candidate(&node);
+	raft_node_promote(&node);
 	raft_run_next_event();
-	is(node.raft.state, RAFT_STATE_LEADER, "became leader after "
-	   "start candidate");
+	is(node.raft.state, RAFT_STATE_LEADER, "became leader after promotion");
 
-	raft_node_stop_candidate(&node);
-	raft_run_for(node.cfg_death_timeout);
-	is(node.raft.state, RAFT_STATE_LEADER, "remain leader after "
-	   "stop candidate");
-
-	raft_node_demote_candidate(&node);
-	is(node.raft.state, RAFT_STATE_FOLLOWER, "demote drops a non-candidate "
-	   "leader to a follower");
+	raft_node_restore(&node);
+	is(node.raft.state, RAFT_STATE_FOLLOWER, "restore drops a "
+	   "non-candidate leader to a follower");
 
 	/*
 	 * Ensure the non-candidate leader is demoted when sees a new term, and
 	 * does not try election again.
 	 */
-	raft_node_start_candidate(&node);
+	raft_node_promote(&node);
 	raft_run_next_event();
-	raft_node_stop_candidate(&node);
-	is(node.raft.state, RAFT_STATE_LEADER, "non-candidate but still "
-	   "leader");
+	is(node.raft.state, RAFT_STATE_LEADER, "became leader after promotion");
+	ok(node.raft.is_candidate, "is a candidate");
 
 	is(raft_node_send_vote_request(&node,
 		4 /* Term. */,
@@ -1394,11 +1387,23 @@ raft_test_start_stop_candidate(void)
 	), 0, "vote request from 2");
 	is(node.raft.state, RAFT_STATE_FOLLOWER, "demote once new election "
 	   "starts");
+	ok(!node.raft.is_candidate, "is not a candidate after term bump");
 
 	raft_run_for(node.cfg_election_timeout * 2);
 	is(node.raft.state, RAFT_STATE_FOLLOWER, "still follower");
 	is(node.raft.term, 4, "still the same term");
 
+	/* Promote does not do anything on a disabled node. */
+	raft_node_cfg_is_candidate(&node, true);
+	raft_node_cfg_is_enabled(&node, false);
+	raft_node_promote(&node);
+	is(node.raft.term, 4, "still old term");
+	ok(!node.raft.is_candidate, "not a candidate");
+
+	/* Restore takes into account if Raft is enabled. */
+	raft_node_restore(&node);
+	ok(!node.raft.is_candidate, "not a candidate");
+
 	raft_node_destroy(&node);
 	raft_finish_test();
 }
@@ -1424,7 +1429,7 @@ main_f(va_list ap)
 	raft_test_death_timeout();
 	raft_test_enable_disable();
 	raft_test_too_long_wal_write();
-	raft_test_start_stop_candidate();
+	raft_test_promote_restore();
 
 	fakeev_free();
 
diff --git a/test/unit/raft.result b/test/unit/raft.result
index f89cd1658..d2a9ac882 100644
--- a/test/unit/raft.result
+++ b/test/unit/raft.result
@@ -230,16 +230,20 @@ ok 12 - subtests
     ok 8 - became candidate
 ok 13 - subtests
 	*** raft_test_too_long_wal_write: done ***
-	*** raft_test_start_stop_candidate ***
-    1..8
-    ok 1 - became leader after start candidate
-    ok 2 - remain leader after stop candidate
-    ok 3 - demote drops a non-candidate leader to a follower
-    ok 4 - non-candidate but still leader
+	*** raft_test_promote_restore ***
+    1..12
+    ok 1 - became leader after promotion
+    ok 2 - restore drops a non-candidate leader to a follower
+    ok 3 - became leader after promotion
+    ok 4 - is a candidate
     ok 5 - vote request from 2
     ok 6 - demote once new election starts
-    ok 7 - still follower
-    ok 8 - still the same term
+    ok 7 - is not a candidate after term bump
+    ok 8 - still follower
+    ok 9 - still the same term
+    ok 10 - still old term
+    ok 11 - not a candidate
+    ok 12 - not a candidate
 ok 14 - subtests
-	*** raft_test_start_stop_candidate: done ***
+	*** raft_test_promote_restore: done ***
 	*** main_f: done ***
diff --git a/test/unit/raft_test_utils.c b/test/unit/raft_test_utils.c
index 452c05c81..b5754cf78 100644
--- a/test/unit/raft_test_utils.c
+++ b/test/unit/raft_test_utils.c
@@ -388,24 +388,19 @@ raft_node_unblock(struct raft_node *node)
 }
 
 void
-raft_node_start_candidate(struct raft_node *node)
+raft_node_promote(struct raft_node *node)
 {
 	assert(raft_node_is_started(node));
-	raft_start_candidate(&node->raft);
-}
-
-void
-raft_node_stop_candidate(struct raft_node *node)
-{
-	assert(raft_node_is_started(node));
-	raft_stop_candidate(&node->raft, false);
+	raft_promote(&node->raft);
+	raft_run_async_work();
 }
 
 void
-raft_node_demote_candidate(struct raft_node *node)
+raft_node_restore(struct raft_node *node)
 {
 	assert(raft_node_is_started(node));
-	raft_stop_candidate(&node->raft, true);
+	raft_restore(&node->raft);
+	raft_run_async_work();
 }
 
 void
diff --git a/test/unit/raft_test_utils.h b/test/unit/raft_test_utils.h
index 5f8618716..c68dc3b22 100644
--- a/test/unit/raft_test_utils.h
+++ b/test/unit/raft_test_utils.h
@@ -208,22 +208,16 @@ raft_node_block(struct raft_node *node);
 void
 raft_node_unblock(struct raft_node *node);
 
-/**
- * Make the node candidate, and maybe start election if a leader is not known.
- */
+/** Promote the node. It bumps the term and tries to become a leader. */
 void
-raft_node_start_candidate(struct raft_node *node);
+raft_node_promote(struct raft_node *node);
 
 /**
- * Make the node non-candidate for next elections, but if it is a leader right
- * now, it will stay a leader.
+ * Restore the node's state back to its is_candidate config. It is demoted if
+ * was a leader.
  */
 void
-raft_node_stop_candidate(struct raft_node *node);
-
-/** Stop the candidate and remove its leader role if present. */
-void
-raft_node_demote_candidate(struct raft_node *node);
+raft_node_restore(struct raft_node *node);
 
 /** Configuration methods. */
 
-- 
2.24.3 (Apple Git-128)



More information about the Tarantool-patches mailing list