[Tarantool-patches] [PATCH v4 10/12] election: support manual elections in clear_synchro_queue()

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Sat Apr 17 01:24:27 MSK 2021


Thanks for the patch!

See 1 comment below.

> diff --git a/src/box/box.cc b/src/box/box.cc
> index d5a55a30a..fcd812c09 100644
> --- a/src/box/box.cc
> +++ b/src/box/box.cc
> @@ -1521,12 +1521,75 @@ box_clear_synchro_queue(bool try_wait)
>  	if (!is_box_configured ||
>  	    raft_node_term(box_raft(), instance_id) == box_raft()->term)
>  		return 0;
> +
> +	bool run_elections = false;
> +
> +	switch (box_election_mode) {
> +	case ELECTION_MODE_OFF:
> +		break;
> +	case ELECTION_MODE_VOTER:
> +		assert(box_raft()->state == RAFT_STATE_FOLLOWER);
> +		diag_set(ClientError, ER_UNSUPPORTED, "election_mode='voter'",
> +			 "manual elections");
> +		return -1;
> +	case ELECTION_MODE_MANUAL:
> +		assert(box_raft()->state == RAFT_STATE_FOLLOWER);
> +		run_elections = true;
> +		try_wait = false;
> +		break;
> +	case ELECTION_MODE_CANDIDATE:
> +		/*
> +		 * Leader elections are enabled, and this instance is allowed to
> +		 * promote only if it's already an elected leader. No manual
> +		 * elections.
> +		 */
> +		if (box_raft()->state != RAFT_STATE_LEADER) {
> +			diag_set(ClientError, ER_UNSUPPORTED, "election_mode="
> +				 "'candidate'", "manual elections");
> +			return -1;
> +		}
> +		break;
> +	default:
> +		unreachable();
> +	}
> +
>  	uint32_t former_leader_id = txn_limbo.owner_id;
>  	int64_t wait_lsn = txn_limbo.confirmed_lsn;
>  	int rc = 0;
>  	int quorum = replication_synchro_quorum;
>  	in_clear_synchro_queue = true;
>  
> +	if (run_elections) {
> +		/*
> +		 * Make this instance a candidate and run until some leader, not
> +		 * necessarily this instance, emerges.
> +		 */
> +		raft_start_candidate(box_raft());
> +		/*
> +		 * Trigger new elections without waiting for an old leader to
> +		 * disappear.
> +		 */
> +		raft_new_term(box_raft());
> +		box_raft_wait_leader_found();

Shouldn't we wait for election_timeout?

Also what if the fiber is canceled before the leader is found? It
seems box_raft_wait_leader_found() would fail on an assertion because
raft is still enabled, but leader_id is nil.

> +		/*
> +		 * Do not reset raft mode if it was changed while running the
> +		 * elections.
> +		 */
> +		if (box_election_mode == ELECTION_MODE_MANUAL)
> +			raft_stop_candidate(box_raft(), false);
> +		if (!box_raft()->is_enabled) {
> +			diag_set(ClientError, ER_RAFT_DISABLED);
> +			in_clear_synchro_queue = false;
> +			return -1;
> +		}
> +		if (box_raft()->state != RAFT_STATE_LEADER) {
> +			diag_set(ClientError, ER_INTERFERING_PROMOTE,
> +				 box_raft()->leader);
> +			in_clear_synchro_queue = false;
> +			return -1;
> +		}
> +	}
> +
>  	if (txn_limbo_is_empty(&txn_limbo))
>  		goto promote;
>  


More information about the Tarantool-patches mailing list