From: Serge Petrenko via Tarantool-patches <tarantool-patches@dev.tarantool.org>
To: v.shpilevoy@tarantool.org, gorcunov@gmail.com
Cc: tarantool-patches@dev.tarantool.org
Subject: [Tarantool-patches] [PATCH v2 8/9] Support manual elections in `box.ctl.clear_synchro_queue()`
Date: Mon, 12 Apr 2021 22:40:21 +0300	[thread overview]
Message-ID: <77f6f4be4b6c2c2b81f4fbe1e292062eca63361b.1618256019.git.sergepetrenko@tarantool.org> (raw)
In-Reply-To: <cover.1618256019.git.sergepetrenko@tarantool.org>
This patch adds support for manual elections from
`box.ctl.clear_synchro_queue()`. When an instance is in
`election_mode='manual'`, calling `clear_synchro_queue()` will make it
start a new election round.
Follow-up #5445
Part of #3055
@TarantoolBot document
Title: describe election_mode='manual'
Manual election mode is introduced. It may be used when the user wants to
control which instance is the leader explicitly instead of relying on
Raft election algorithm.
When an instance is configured with `election_mode='manual'`, it behaves
as follows:
 1) By default, the instance acts like a voter: it is read-only and may
    vote for other instances that are candidates.
 2) Once `box.ctl.clear_synchro_queue()` is called, the instance becomes a
    candidate and starts a new election round. If the instance wins the
    elections, it remains leader, but won't participate in any new elections.
---
 src/box/box.cc              | 74 +++++++++++++++++++++++++++++++++++--
 src/box/errcode.h           |  3 ++
 src/box/raft.c              | 25 ++++++++++++-
 src/box/raft.h              |  3 ++
 src/lib/raft/raft.c         | 12 +++++-
 src/lib/raft/raft.h         |  2 +-
 test/box/error.result       |  3 ++
 test/unit/raft_test_utils.c |  4 +-
 8 files changed, 116 insertions(+), 10 deletions(-)
diff --git a/src/box/box.cc b/src/box/box.cc
index b77b0a08d..dc7f434e4 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1161,7 +1161,8 @@ box_set_election_mode(void)
 	if (mode == ELECTION_MODE_INVALID)
 		return -1;
 	box_election_mode = mode;
-	raft_cfg_is_candidate(box_raft(), mode == ELECTION_MODE_CANDIDATE);
+	raft_cfg_is_candidate(box_raft(), mode == ELECTION_MODE_CANDIDATE,
+			      true);
 	raft_cfg_is_enabled(box_raft(), mode != ELECTION_MODE_OFF);
 	return 0;
 }
@@ -1525,12 +1526,77 @@ box_clear_synchro_queue(bool try_wait)
 	if (!is_box_configured ||
 	    raft_source_term(box_raft(), instance_id) == box_raft()->term)
 		return 0;
+
+	bool run_elections = false;
+
+	switch (box_election_mode) {
+	case ELECTION_MODE_OFF:
+		break;
+	case ELECTION_MODE_VOTER:
+		diag_set(ClientError, ER_UNSUPPORTED, "election_mode='voter'",
+			 "manual elections");
+		return -1;
+	case ELECTION_MODE_MANUAL:
+		/*
+		 * Even if this instance is already a leader, allow to bump term
+		 * manually once again.
+		 */
+		assert(box_raft()->state != RAFT_STATE_CANDIDATE);
+		if (box_raft()->state == RAFT_STATE_LEADER) {
+			diag_set(ClientError, ER_ALREADY_LEADER);
+			return -1;
+		}
+		run_elections = true;
+		try_wait = false;
+		break;
+	case ELECTION_MODE_CANDIDATE:
+		/*
+		 * Leader elections are enabled, and this instance is allowed to
+		 * promote only if it's already an elected leader. No manual
+		 * elections.
+		 */
+		if (box_raft()->state != RAFT_STATE_LEADER) {
+			diag_set(ClientError, ER_UNSUPPORTED, "election_mode="
+				 "'candidate'", "manual elections");
+			return -1;
+		}
+		break;
+	default:
+		unreachable();
+	}
+
 	uint32_t former_leader_id = txn_limbo.owner_id;
 	int64_t wait_lsn = txn_limbo.confirmed_lsn;
 	int rc = 0;
 	int quorum = replication_synchro_quorum;
 	in_clear_synchro_queue = true;
 
+	if (run_elections) {
+		/*
+		 * Make this instance a candidate and run until some leader, not
+		 * necessarily this instance, emerges.
+		 */
+		raft_cfg_is_candidate(box_raft(), true, false);
+		/*
+		 * Trigger new elections without waiting for an old leader to
+		 * disappear.
+		 */
+		raft_new_term(box_raft());
+		box_raft_wait_leader_found();
+		raft_cfg_is_candidate(box_raft(), false, false);
+		if (!box_raft()->is_enabled) {
+			diag_set(ClientError, ER_RAFT_DISABLED);
+			in_clear_synchro_queue = false;
+			return -1;
+		}
+		if (box_raft()->state != RAFT_STATE_LEADER) {
+			diag_set(ClientError, ER_INTERFERING_PROMOTE,
+				 box_raft()->leader);
+			in_clear_synchro_queue = false;
+			return -1;
+		}
+	}
+
 	if (txn_limbo_is_empty(&txn_limbo))
 		goto promote;
 
@@ -1548,10 +1614,10 @@ box_clear_synchro_queue(bool try_wait)
 		 * transactions. Exit in case someone did that for us.
 		 */
 		if (former_leader_id != txn_limbo.owner_id) {
-			//TODO: error once we see someone else became the leader
-			// already.
+			diag_set(ClientError, ER_INTERFERING_PROMOTE,
+				 txn_limbo.owner_id);
 			in_clear_synchro_queue = false;
-			return 0;
+			return -1;
 		}
 	}
 
diff --git a/src/box/errcode.h b/src/box/errcode.h
index 56573688e..e5c9f3b09 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -275,6 +275,9 @@ struct errcode_record {
 	/*220 */_(ER_TOO_EARLY_SUBSCRIBE,	"Can't subscribe non-anonymous replica %s until join is done") \
 	/*221 */_(ER_SQL_CANT_ADD_AUTOINC,	"Can't add AUTOINCREMENT: space %s can't feature more than one AUTOINCREMENT field") \
 	/*222 */_(ER_QUORUM_WAIT,		"Couldn't wait for quorum %d: %s") \
+	/*223 */_(ER_INTERFERING_PROMOTE,	"Instance with replica id %u was promoted first") \
+	/*224 */_(ER_RAFT_DISABLED,		"Elections were turned off while running box.ctl.promote()")\
+	/*225 */_(ER_ALREADY_LEADER,		"Can't promote an existing leader")\
 
 /*
  * !IMPORTANT! Please follow instructions at start of the file
diff --git a/src/box/raft.c b/src/box/raft.c
index 285dbe4fd..47d4fd56d 100644
--- a/src/box/raft.c
+++ b/src/box/raft.c
@@ -95,7 +95,8 @@ box_raft_update_synchro_queue(struct raft *raft)
 	 * manually. In this case the call below will exit immediately and we'll
 	 * simply log a warning.
 	 */
-	if (raft->state == RAFT_STATE_LEADER) {
+	if (raft->state == RAFT_STATE_LEADER &&
+	    box_election_mode != ELECTION_MODE_MANUAL) {
 		int rc = 0;
 		uint32_t errcode = 0;
 		do {
@@ -336,6 +337,28 @@ fail:
 	panic("Could not write a raft request to WAL\n");
 }
 
+static int
+box_raft_wait_leader_found_trig(struct trigger *trig, void *event)
+{
+	struct raft *raft = (struct raft *)event;
+	assert(raft == box_raft());
+	struct fiber *waiter = (struct fiber *)trig->data;
+	if (raft->leader != REPLICA_ID_NIL || !raft->is_enabled)
+		fiber_wakeup(waiter);
+	return 0;
+}
+
+void
+box_raft_wait_leader_found(void)
+{
+	struct trigger trig;
+	trigger_create(&trig, box_raft_wait_leader_found_trig, fiber(), NULL);
+	raft_on_update(box_raft(), &trig);
+	fiber_yield();
+	assert(box_raft()->leader != REPLICA_ID_NIL || !box_raft()->is_enabled);
+	trigger_clear(&trig);
+}
+
 void
 box_raft_init(void)
 {
diff --git a/src/box/raft.h b/src/box/raft.h
index 15f4e80d9..8fce423e1 100644
--- a/src/box/raft.h
+++ b/src/box/raft.h
@@ -97,6 +97,9 @@ box_raft_checkpoint_remote(struct raft_request *req);
 int
 box_raft_process(struct raft_request *req, uint32_t source);
 
+void
+box_raft_wait_leader_found();
+
 void
 box_raft_init(void);
 
diff --git a/src/lib/raft/raft.c b/src/lib/raft/raft.c
index e9ce8cade..7b77e05ea 100644
--- a/src/lib/raft/raft.c
+++ b/src/lib/raft/raft.c
@@ -846,7 +846,7 @@ raft_cfg_is_enabled(struct raft *raft, bool is_enabled)
 }
 
 void
-raft_cfg_is_candidate(struct raft *raft, bool is_candidate)
+raft_cfg_is_candidate(struct raft *raft, bool is_candidate, bool demote)
 {
 	bool old_is_candidate = raft->is_candidate;
 	raft->is_cfg_candidate = is_candidate;
@@ -874,8 +874,16 @@ raft_cfg_is_candidate(struct raft *raft, bool is_candidate)
 			raft_ev_timer_stop(raft_loop(), &raft->timer);
 		}
 		if (raft->state != RAFT_STATE_FOLLOWER) {
-			if (raft->state == RAFT_STATE_LEADER)
+			if (raft->state == RAFT_STATE_LEADER) {
+				if (!demote) {
+					/*
+					 * Remain leader until someone
+					 * triggers new elections.
+					 */
+					return;
+				}
 				raft->leader = 0;
+			}
 			raft->state = RAFT_STATE_FOLLOWER;
 			/* State is visible and changed - broadcast. */
 			raft_schedule_broadcast(raft);
diff --git a/src/lib/raft/raft.h b/src/lib/raft/raft.h
index 40c8630e9..3526460af 100644
--- a/src/lib/raft/raft.h
+++ b/src/lib/raft/raft.h
@@ -325,7 +325,7 @@ raft_cfg_is_enabled(struct raft *raft, bool is_enabled);
  * the node still can vote, when Raft is enabled.
  */
 void
-raft_cfg_is_candidate(struct raft *raft, bool is_candidate);
+raft_cfg_is_candidate(struct raft *raft, bool is_candidate, bool demote);
 
 /** Configure Raft leader election timeout. */
 void
diff --git a/test/box/error.result b/test/box/error.result
index 7761c6949..dad6a21d3 100644
--- a/test/box/error.result
+++ b/test/box/error.result
@@ -441,6 +441,9 @@ t;
  |   220: box.error.TOO_EARLY_SUBSCRIBE
  |   221: box.error.SQL_CANT_ADD_AUTOINC
  |   222: box.error.QUORUM_WAIT
+ |   223: box.error.INTERFERING_PROMOTE
+ |   224: box.error.RAFT_DISABLED
+ |   225: box.error.ALREADY_LEADER
  | ...
 
 test_run:cmd("setopt delimiter ''");
diff --git a/test/unit/raft_test_utils.c b/test/unit/raft_test_utils.c
index b8735f373..a10ccae6a 100644
--- a/test/unit/raft_test_utils.c
+++ b/test/unit/raft_test_utils.c
@@ -360,7 +360,7 @@ raft_node_start(struct raft_node *node)
 		raft_process_recovery(&node->raft, &node->journal.rows[i]);
 
 	raft_cfg_is_enabled(&node->raft, node->cfg_is_enabled);
-	raft_cfg_is_candidate(&node->raft, node->cfg_is_candidate);
+	raft_cfg_is_candidate(&node->raft, node->cfg_is_candidate, true);
 	raft_cfg_election_timeout(&node->raft, node->cfg_election_timeout);
 	raft_cfg_election_quorum(&node->raft, node->cfg_election_quorum);
 	raft_cfg_death_timeout(&node->raft, node->cfg_death_timeout);
@@ -402,7 +402,7 @@ raft_node_cfg_is_candidate(struct raft_node *node, bool value)
 {
 	node->cfg_is_candidate = value;
 	if (raft_node_is_started(node)) {
-		raft_cfg_is_candidate(&node->raft, value);
+		raft_cfg_is_candidate(&node->raft, value, true);
 		raft_run_async_work();
 	}
 }
-- 
2.24.3 (Apple Git-128)
next prev parent reply	other threads:[~2021-04-12 19:44 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-12 19:40 [Tarantool-patches] [PATCH v2 0/9] raft: introduce manual elections and fix a bug with re-applying rolled back transactions Serge Petrenko via Tarantool-patches
2021-04-12 19:40 ` [Tarantool-patches] [PATCH v2 1/9] wal: enrich row's meta information with sync replication flags Serge Petrenko via Tarantool-patches
2021-04-13 11:50   ` Cyrill Gorcunov via Tarantool-patches
2021-04-13 13:51     ` Serge Petrenko via Tarantool-patches
2021-04-13 14:16       ` Cyrill Gorcunov via Tarantool-patches
2021-04-13 13:09   ` Cyrill Gorcunov via Tarantool-patches
2021-04-13 13:29     ` Serge Petrenko via Tarantool-patches
2021-04-12 19:40 ` [Tarantool-patches] [PATCH v2 2/9] xrow: introduce a PROMOTE entry Serge Petrenko via Tarantool-patches
2021-04-13 14:15   ` Cyrill Gorcunov via Tarantool-patches
2021-04-14  9:12     ` Serge Petrenko via Tarantool-patches
2021-04-14 10:00       ` Cyrill Gorcunov via Tarantool-patches
2021-04-12 19:40 ` [Tarantool-patches] [PATCH v2 3/9] box: actualise iproto_key_type array Serge Petrenko via Tarantool-patches
2021-04-12 19:40 ` [Tarantool-patches] [PATCH v2 4/9] box: make clear_synchro_queue() write a PROMOTE entry instead of CONFIRM + ROLLBACK Serge Petrenko via Tarantool-patches
2021-04-13 14:33   ` Cyrill Gorcunov via Tarantool-patches
2021-04-14  8:23     ` Serge Petrenko via Tarantool-patches
2021-04-14  8:34       ` Cyrill Gorcunov via Tarantool-patches
2021-04-14  9:12     ` Serge Petrenko via Tarantool-patches
2021-04-12 19:40 ` [Tarantool-patches] [PATCH v2 5/9] box: write PROMOTE even for empty limbo Serge Petrenko via Tarantool-patches
2021-04-12 19:40 ` [Tarantool-patches] [PATCH v2 6/9] raft: keep track of greatest known term and filter replication sources based on that Serge Petrenko via Tarantool-patches
2021-04-12 19:40 ` [Tarantool-patches] [PATCH v2 7/9] replication: introduce a new election mode: "manual" Serge Petrenko via Tarantool-patches
2021-04-12 19:40 ` Serge Petrenko via Tarantool-patches [this message]
2021-04-12 19:40 ` [Tarantool-patches] [PATCH v2 9/9] box.ctl: rename clear_synchro_queue to promote Serge Petrenko via Tarantool-patches
2021-04-13 14:42 ` [Tarantool-patches] [PATCH v2 0/9] raft: introduce manual elections and fix a bug with re-applying rolled back transactions Cyrill Gorcunov via Tarantool-patches
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox
  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):
  git send-email \
    --in-reply-to=77f6f4be4b6c2c2b81f4fbe1e292062eca63361b.1618256019.git.sergepetrenko@tarantool.org \
    --to=tarantool-patches@dev.tarantool.org \
    --cc=gorcunov@gmail.com \
    --cc=sergepetrenko@tarantool.org \
    --cc=v.shpilevoy@tarantool.org \
    --subject='Re: [Tarantool-patches] [PATCH v2 8/9] Support manual elections in `box.ctl.clear_synchro_queue()`' \
    /path/to/YOUR_REPLY
  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox