[Tarantool-patches] [PATCH v3 08/10] Support manual elections in `box.ctl.clear_synchro_queue()`

Serge Petrenko sergepetrenko at tarantool.org
Fri Apr 16 18:38:00 MSK 2021



16.04.2021 02:30, Vladislav Shpilevoy пишет:
> Thanks for working on this!
>
> See 5 comments below.
>
>> diff --git a/src/box/box.cc b/src/box/box.cc
>> index 3729ed997..6c7c8968a 100644
>> --- a/src/box/box.cc
>> +++ b/src/box/box.cc
>> @@ -1525,12 +1526,74 @@ box_clear_synchro_queue(bool try_wait)
>>   	if (!is_box_configured ||
>>   	    raft_source_term(box_raft(), instance_id) == box_raft()->term)
>>   		return 0;
>> +
>> +	bool run_elections = false;
>> +
>> +	switch (box_election_mode) {
>> +	case ELECTION_MODE_OFF:
>> +		break;
>> +	case ELECTION_MODE_VOTER:
>> +		assert(box_raft()->state == RAFT_STATE_FOLLOWER);
>> +		diag_set(ClientError, ER_UNSUPPORTED, "election_mode='voter'",
>> +			 "manual elections");
>> +		return -1;
>> +	case ELECTION_MODE_MANUAL:
>> +		assert(box_raft()->state != RAFT_STATE_CANDIDATE);
>> +		if (box_raft()->state == RAFT_STATE_LEADER) {
>> +			diag_set(ClientError, ER_ALREADY_LEADER);
>> +			return -1;
>> +		}
>> +		run_elections = true;
>> +		try_wait = false;
>> +		break;
>> +	case ELECTION_MODE_CANDIDATE:
>> +		/*
>> +		 * Leader elections are enabled, and this instance is allowed to
>> +		 * promote only if it's already an elected leader. No manual
>> +		 * elections.
>> +		 */
>> +		if (box_raft()->state != RAFT_STATE_LEADER) {
> 1. That is strange. Why do you allow to promote the node
> if it is already the leader when mode is candidate, but do
> not allow the same when the mode is manual?

It's because box_clear_synchro_queue() may be called by raft worker,
once the node becomes leader. So I cannot forbid this.

Actually, there's a guard for manual `box.ctl.clear_synchro_queue()` above,
I just didn't make proper use of it. I mean these lines:

  	if (!is_box_configured ||
  	    raft_source_term(box_raft(), instance_id) == box_raft()->term)
  		return 0;

So I don't need the ER_ALREADY_LEADER in ELECTION_MODE_MANUAL case.

Will fix.
Thanks for pointing this out!

>
> Shouldn't we throw an error when the mode is candidate
> regardless of the node role?
>
>> +			diag_set(ClientError, ER_UNSUPPORTED, "election_mode="
>> +				 "'candidate'", "manual elections");
>> +			return -1;
>> +		}
>> +		break;
>> +	default:
>> +		unreachable();
>> +	}
>> +
>>   	uint32_t former_leader_id = txn_limbo.owner_id;
>>   	int64_t wait_lsn = txn_limbo.confirmed_lsn;
>>   	int rc = 0;
>>   	int quorum = replication_synchro_quorum;
>>   	in_clear_synchro_queue = true;
>>   
>> +	if (run_elections) {
>> +		/*
>> +		 * Make this instance a candidate and run until some leader, not
>> +		 * necessarily this instance, emerges.
>> +		 */
>> +		raft_cfg_is_candidate(box_raft(), true, false);
>> +		/*
>> +		 * Trigger new elections without waiting for an old leader to
>> +		 * disappear.
>> +		 */
>> +		raft_new_term(box_raft());
>> +		box_raft_wait_leader_found();
>> +		raft_cfg_is_candidate(box_raft(), false, false);
> 2. What if during box_raft_wait_leader_found() I made the node candidate
> via box.cfg? Won't you then reset it back to non-candidate here?
Yes, that's true.
>
> It probably should reset the current box.cfg mode back. Not just
> remove the candidate flag.

I think it's strange to reconfigure box automatically.
I suggest to reset node to non-candidate only if the mode
remains MANUAL after the election.
>
>> +		if (!box_raft()->is_enabled) {
>> +			diag_set(ClientError, ER_RAFT_DISABLED);
>> +			in_clear_synchro_queue = false;
>> +			return -1;
>> +		}
>> +		if (box_raft()->state != RAFT_STATE_LEADER) {
>> +			diag_set(ClientError, ER_INTERFERING_PROMOTE,
>> +				 box_raft()->leader);
>> +			in_clear_synchro_queue = false;
>> +			return -1;
>> +		}
>> +	}
>> +
>>   	if (txn_limbo_is_empty(&txn_limbo))
>>   		goto promote;
>>   
>> diff --git a/src/box/raft.c b/src/box/raft.c
>> index 285dbe4fd..c7dc79f9b 100644
>> --- a/src/box/raft.c
>> +++ b/src/box/raft.c
>> @@ -336,6 +336,28 @@ fail:
>>   	panic("Could not write a raft request to WAL\n");
>>   }
>>   
>> +static int
>> +box_raft_wait_leader_found_trig(struct trigger *trig, void *event)
> 3. I thought we usually call triggers with _f suffix, not _trig.

Sure. Fixed.
>
>> +{
>> +	struct raft *raft = (struct raft *)event;
>> +	assert(raft == box_raft());
>> +	struct fiber *waiter = (struct fiber *)trig->data;
> 4. No need to cast this and event - void * cast works naturally in C.

Ok.

>
>> +	if (raft->leader != REPLICA_ID_NIL || !raft->is_enabled)
>> +		fiber_wakeup(waiter);
>> +	return 0;
>> +}
>> diff --git a/src/box/raft.h b/src/box/raft.h
>> index 15f4e80d9..8fce423e1 100644
>> --- a/src/box/raft.h
>> +++ b/src/box/raft.h
>> @@ -97,6 +97,9 @@ box_raft_checkpoint_remote(struct raft_request *req);
>>   int
>>   box_raft_process(struct raft_request *req, uint32_t source);
>>   
>> +void
>> +box_raft_wait_leader_found();
>> +
>>   void
>>   box_raft_init(void);
>>   
>> diff --git a/src/lib/raft/raft.c b/src/lib/raft/raft.c
>> index e9ce8cade..7b77e05ea 100644
>> --- a/src/lib/raft/raft.c
>> +++ b/src/lib/raft/raft.c
>> @@ -846,7 +846,7 @@ raft_cfg_is_enabled(struct raft *raft, bool is_enabled)
>>   }
>>   
>>   void
>> -raft_cfg_is_candidate(struct raft *raft, bool is_candidate)
>> +raft_cfg_is_candidate(struct raft *raft, bool is_candidate, bool demote)
> 5. I know it might lead to some code duplication, but probably
> better move that to other functions. For example,
>
> 	raft_cfg_is_temporary_candidate()
>
> or something like that. Otherwise it appears surprisingly hard
> to follow these 2 flags together. Although I might be wrong and
> it would look worse. Did you try?
>
> Or another option:
>
> 	raft_cfg_is_candidate(box_raft(), true, false);
> 	raft_cfg_is_candidate(box_raft(), false, false);
>
> turns into
>
> 	raft_start_candidate(box_raft())
> 	raft_stop_candidate(box_raft())
>
> Also it would be good to have unit tests for the changes in raft.h
> and raft.c.

This variant sounds good. I'll implement in in a new commit.

Incremental diff:

==========================================

diff --git a/src/box/box.cc b/src/box/box.cc
index 37938df15..fcd812c09 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1157,8 +1157,7 @@ box_set_election_mode(void)
      if (mode == ELECTION_MODE_INVALID)
          return -1;
      box_election_mode = mode;
-    raft_cfg_is_candidate(box_raft(), mode == ELECTION_MODE_CANDIDATE,
-                  true);
+    raft_cfg_is_candidate(box_raft(), mode == ELECTION_MODE_CANDIDATE);
      raft_cfg_is_enabled(box_raft(), mode != ELECTION_MODE_OFF);
      return 0;
  }
@@ -1534,11 +1533,7 @@ box_clear_synchro_queue(bool try_wait)
               "manual elections");
          return -1;
      case ELECTION_MODE_MANUAL:
-        assert(box_raft()->state != RAFT_STATE_CANDIDATE);
-        if (box_raft()->state == RAFT_STATE_LEADER) {
-            diag_set(ClientError, ER_ALREADY_LEADER);
-            return -1;
-        }
+        assert(box_raft()->state == RAFT_STATE_FOLLOWER);
          run_elections = true;
          try_wait = false;
          break;
@@ -1569,14 +1564,19 @@ box_clear_synchro_queue(bool try_wait)
           * Make this instance a candidate and run until some leader, not
           * necessarily this instance, emerges.
           */
-        raft_cfg_is_candidate(box_raft(), true, false);
+        raft_start_candidate(box_raft());
          /*
           * Trigger new elections without waiting for an old leader to
           * disappear.
           */
          raft_new_term(box_raft());
          box_raft_wait_leader_found();
-        raft_cfg_is_candidate(box_raft(), false, false);
+        /*
+         * Do not reset raft mode if it was changed while running the
+         * elections.
+         */
+        if (box_election_mode == ELECTION_MODE_MANUAL)
+            raft_stop_candidate(box_raft(), false);
          if (!box_raft()->is_enabled) {
              diag_set(ClientError, ER_RAFT_DISABLED);
              in_clear_synchro_queue = false;
diff --git a/src/box/errcode.h b/src/box/errcode.h
index df36085db..d93820e96 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -277,7 +277,6 @@ struct errcode_record {
      /*222 */_(ER_QUORUM_WAIT,        "Couldn't wait for quorum %d: %s") \
      /*223 */_(ER_INTERFERING_PROMOTE,    "Instance with replica id %u 
was promoted first") \
      /*224 */_(ER_RAFT_DISABLED,        "Elections were turned off 
while running box.ctl.promote()")\
-    /*225 */_(ER_ALREADY_LEADER,        "Can't promote an existing 
leader")\

  /*
   * !IMPORTANT! Please follow instructions at start of the file
diff --git a/src/box/raft.c b/src/box/raft.c
index c7dc79f9b..425353207 100644
--- a/src/box/raft.c
+++ b/src/box/raft.c
@@ -337,11 +337,11 @@ fail:
  }

  static int
-box_raft_wait_leader_found_trig(struct trigger *trig, void *event)
+box_raft_wait_leader_found_f(struct trigger *trig, void *event)
  {
-    struct raft *raft = (struct raft *)event;
+    struct raft *raft = event;
      assert(raft == box_raft());
-    struct fiber *waiter = (struct fiber *)trig->data;
+    struct fiber *waiter = trig->data;
      if (raft->leader != REPLICA_ID_NIL || !raft->is_enabled)
          fiber_wakeup(waiter);
      return 0;
@@ -351,7 +351,7 @@ void
  box_raft_wait_leader_found(void)
  {
      struct trigger trig;
-    trigger_create(&trig, box_raft_wait_leader_found_trig, fiber(), NULL);
+    trigger_create(&trig, box_raft_wait_leader_found_f, fiber(), NULL);
      raft_on_update(box_raft(), &trig);
      fiber_yield();
      assert(box_raft()->leader != REPLICA_ID_NIL || 
!box_raft()->is_enabled);
diff --git a/src/lib/raft/raft.c b/src/lib/raft/raft.c
index d557c907b..8deb06eb5 100644
--- a/src/lib/raft/raft.c
+++ b/src/lib/raft/raft.c
@@ -846,7 +846,7 @@ raft_cfg_is_enabled(struct raft *raft, bool is_enabled)
  }

  void
-raft_cfg_is_candidate(struct raft *raft, bool is_candidate, bool demote)
+raft_cfg_is_candidate(struct raft *raft, bool is_candidate)
  {
      raft->is_cfg_candidate = is_candidate;
      is_candidate = is_candidate && raft->is_enabled;
diff --git a/src/lib/raft/raft.h b/src/lib/raft/raft.h
index b140ff3ba..69dec63c6 100644
--- a/src/lib/raft/raft.h
+++ b/src/lib/raft/raft.h
@@ -325,7 +325,7 @@ raft_cfg_is_enabled(struct raft *raft, bool is_enabled);
   * the node still can vote, when Raft is enabled.
   */
  void
-raft_cfg_is_candidate(struct raft *raft, bool is_candidate, bool demote);
+raft_cfg_is_candidate(struct raft *raft, bool is_candidate);

  /**
   * Make the instance a candidate.
diff --git a/test/box/error.result b/test/box/error.result
index dad6a21d3..cc8cbaaa9 100644
--- a/test/box/error.result
+++ b/test/box/error.result
@@ -443,7 +443,6 @@ t;
   |   222: box.error.QUORUM_WAIT
   |   223: box.error.INTERFERING_PROMOTE
   |   224: box.error.RAFT_DISABLED
- |   225: box.error.ALREADY_LEADER
   | ...

  test_run:cmd("setopt delimiter ''");
diff --git a/test/unit/raft_test_utils.c b/test/unit/raft_test_utils.c
index a10ccae6a..b8735f373 100644
--- a/test/unit/raft_test_utils.c
+++ b/test/unit/raft_test_utils.c
@@ -360,7 +360,7 @@ raft_node_start(struct raft_node *node)
          raft_process_recovery(&node->raft, &node->journal.rows[i]);

      raft_cfg_is_enabled(&node->raft, node->cfg_is_enabled);
-    raft_cfg_is_candidate(&node->raft, node->cfg_is_candidate, true);
+    raft_cfg_is_candidate(&node->raft, node->cfg_is_candidate);
      raft_cfg_election_timeout(&node->raft, node->cfg_election_timeout);
      raft_cfg_election_quorum(&node->raft, node->cfg_election_quorum);
      raft_cfg_death_timeout(&node->raft, node->cfg_death_timeout);
@@ -402,7 +402,7 @@ raft_node_cfg_is_candidate(struct raft_node *node, 
bool value)
  {
      node->cfg_is_candidate = value;
      if (raft_node_is_started(node)) {
-        raft_cfg_is_candidate(&node->raft, value, true);
+        raft_cfg_is_candidate(&node->raft, value);
          raft_run_async_work();
      }
  }

-- 
Serge Petrenko



More information about the Tarantool-patches mailing list