[Tarantool-patches] [PATCH v4 10/12] election: support manual elections in clear_synchro_queue()

Serge Petrenko sergepetrenko at tarantool.org
Sun Apr 18 12:26:41 MSK 2021



17.04.2021 01:24, Vladislav Shpilevoy пишет:
> Thanks for the patch!
>
> See 1 comment below.
>
>> diff --git a/src/box/box.cc b/src/box/box.cc
>> index d5a55a30a..fcd812c09 100644
>> --- a/src/box/box.cc
>> +++ b/src/box/box.cc
>> @@ -1521,12 +1521,75 @@ box_clear_synchro_queue(bool try_wait)
>>   	if (!is_box_configured ||
>>   	    raft_node_term(box_raft(), instance_id) == box_raft()->term)
>>   		return 0;
>> +
>> +	bool run_elections = false;
>> +
>> +	switch (box_election_mode) {
>> +	case ELECTION_MODE_OFF:
>> +		break;
>> +	case ELECTION_MODE_VOTER:
>> +		assert(box_raft()->state == RAFT_STATE_FOLLOWER);
>> +		diag_set(ClientError, ER_UNSUPPORTED, "election_mode='voter'",
>> +			 "manual elections");
>> +		return -1;
>> +	case ELECTION_MODE_MANUAL:
>> +		assert(box_raft()->state == RAFT_STATE_FOLLOWER);
>> +		run_elections = true;
>> +		try_wait = false;
>> +		break;
>> +	case ELECTION_MODE_CANDIDATE:
>> +		/*
>> +		 * Leader elections are enabled, and this instance is allowed to
>> +		 * promote only if it's already an elected leader. No manual
>> +		 * elections.
>> +		 */
>> +		if (box_raft()->state != RAFT_STATE_LEADER) {
>> +			diag_set(ClientError, ER_UNSUPPORTED, "election_mode="
>> +				 "'candidate'", "manual elections");
>> +			return -1;
>> +		}
>> +		break;
>> +	default:
>> +		unreachable();
>> +	}
>> +
>>   	uint32_t former_leader_id = txn_limbo.owner_id;
>>   	int64_t wait_lsn = txn_limbo.confirmed_lsn;
>>   	int rc = 0;
>>   	int quorum = replication_synchro_quorum;
>>   	in_clear_synchro_queue = true;
>>   
>> +	if (run_elections) {
>> +		/*
>> +		 * Make this instance a candidate and run until some leader, not
>> +		 * necessarily this instance, emerges.
>> +		 */
>> +		raft_start_candidate(box_raft());
>> +		/*
>> +		 * Trigger new elections without waiting for an old leader to
>> +		 * disappear.
>> +		 */
>> +		raft_new_term(box_raft());
>> +		box_raft_wait_leader_found();
> Shouldn't we wait for election_timeout?

I think not. Let's wait for however long it takes to elect a leader.
Several terms may pass before the leader is finally elected.

I mean, IMO it would be simpler for the user to do:

```
box.ctl.promote()
    -- term1, split vote
    -- term2, split vote
    -- term3, leader found
-- success
```
rather than
```
box.ctl.promote()
-- error, split vote

box.ctl.promote()
-- error, split vote

box.ctl.promote()
-- success
```
>
> Also what if the fiber is canceled before the leader is found? It
> seems box_raft_wait_leader_found() would fail on an assertion because
> raft is still enabled, but leader_id is nil.

Thanks for noticing! Will fix.

Diff:

==================================

diff --git a/src/box/box.cc b/src/box/box.cc
index 962f649c3..797aa86b5 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1572,13 +1572,17 @@ box_clear_synchro_queue(bool try_wait)
                  * disappear.
                  */
                 raft_new_term(box_raft());
-               box_raft_wait_leader_found();
+               rc = box_raft_wait_leader_found();
                 /*
                  * Do not reset raft mode if it was changed while 
running the
                  * elections.
                  */
                 if (box_election_mode == ELECTION_MODE_MANUAL)
                         raft_stop_candidate(box_raft(), false);
+               if (rc != 0) {
+                       in_clear_synchro_queue = false;
+                       return -1;
+               }
                 if (!box_raft()->is_enabled) {
                         diag_set(ClientError, ER_RAFT_DISABLED);
                         in_clear_synchro_queue = false;
diff --git a/src/box/raft.c b/src/box/raft.c
index 425353207..61fa9f91b 100644
--- a/src/box/raft.c
+++ b/src/box/raft.c
@@ -347,15 +347,20 @@ box_raft_wait_leader_found_f(struct trigger *trig, 
void *event)
         return 0;
  }

-void
+int
  box_raft_wait_leader_found(void)
  {
         struct trigger trig;
         trigger_create(&trig, box_raft_wait_leader_found_f, fiber(), NULL);
         raft_on_update(box_raft(), &trig);
         fiber_yield();
-       assert(box_raft()->leader != REPLICA_ID_NIL || 
!box_raft()->is_enabled);
         trigger_clear(&trig);
+       if (fiber_is_cancelled()) {
+               diag_set(FiberIsCancelled);
+               return -1;
+       }
+       assert(box_raft()->leader != REPLICA_ID_NIL || 
!box_raft()->is_enabled);
+       return 0;
  }

  void
diff --git a/src/box/raft.h b/src/box/raft.h
index 8fce423e1..6b6136510 100644
--- a/src/box/raft.h
+++ b/src/box/raft.h
@@ -97,7 +97,8 @@ box_raft_checkpoint_remote(struct raft_request *req);
  int
  box_raft_process(struct raft_request *req, uint32_t source);

-void
+/** Block this fiber until Raft leader is known. */
+int
  box_raft_wait_leader_found();

  void

>
>> +		/*
>> +		 * Do not reset raft mode if it was changed while running the
>> +		 * elections.
>> +		 */
>> +		if (box_election_mode == ELECTION_MODE_MANUAL)
>> +			raft_stop_candidate(box_raft(), false);
>> +		if (!box_raft()->is_enabled) {
>> +			diag_set(ClientError, ER_RAFT_DISABLED);
>> +			in_clear_synchro_queue = false;
>> +			return -1;
>> +		}
>> +		if (box_raft()->state != RAFT_STATE_LEADER) {
>> +			diag_set(ClientError, ER_INTERFERING_PROMOTE,
>> +				 box_raft()->leader);
>> +			in_clear_synchro_queue = false;
>> +			return -1;
>> +		}
>> +	}
>> +
>>   	if (txn_limbo_is_empty(&txn_limbo))
>>   		goto promote;
>>   

-- 
Serge Petrenko



More information about the Tarantool-patches mailing list