[Tarantool-patches] [PATCH v3 08/10] Support manual elections in `box.ctl.clear_synchro_queue()`
Serge Petrenko
sergepetrenko at tarantool.org
Fri Apr 16 18:38:00 MSK 2021
16.04.2021 02:30, Vladislav Shpilevoy пишет:
> Thanks for working on this!
>
> See 5 comments below.
>
>> diff --git a/src/box/box.cc b/src/box/box.cc
>> index 3729ed997..6c7c8968a 100644
>> --- a/src/box/box.cc
>> +++ b/src/box/box.cc
>> @@ -1525,12 +1526,74 @@ box_clear_synchro_queue(bool try_wait)
>> if (!is_box_configured ||
>> raft_source_term(box_raft(), instance_id) == box_raft()->term)
>> return 0;
>> +
>> + bool run_elections = false;
>> +
>> + switch (box_election_mode) {
>> + case ELECTION_MODE_OFF:
>> + break;
>> + case ELECTION_MODE_VOTER:
>> + assert(box_raft()->state == RAFT_STATE_FOLLOWER);
>> + diag_set(ClientError, ER_UNSUPPORTED, "election_mode='voter'",
>> + "manual elections");
>> + return -1;
>> + case ELECTION_MODE_MANUAL:
>> + assert(box_raft()->state != RAFT_STATE_CANDIDATE);
>> + if (box_raft()->state == RAFT_STATE_LEADER) {
>> + diag_set(ClientError, ER_ALREADY_LEADER);
>> + return -1;
>> + }
>> + run_elections = true;
>> + try_wait = false;
>> + break;
>> + case ELECTION_MODE_CANDIDATE:
>> + /*
>> + * Leader elections are enabled, and this instance is allowed to
>> + * promote only if it's already an elected leader. No manual
>> + * elections.
>> + */
>> + if (box_raft()->state != RAFT_STATE_LEADER) {
> 1. That is strange. Why do you allow to promote the node
> if it is already the leader when mode is candidate, but do
> not allow the same when the mode is manual?
It's because box_clear_synchro_queue() may be called by raft worker,
once the node becomes leader. So I cannot forbid this.
Actually, there's a guard for manual `box.ctl.clear_synchro_queue()` above,
I just didn't make proper use of it. I mean these lines:
if (!is_box_configured ||
raft_source_term(box_raft(), instance_id) == box_raft()->term)
return 0;
So I don't need the ER_ALREADY_LEADER in ELECTION_MODE_MANUAL case.
Will fix.
Thanks for pointing this out!
>
> Shouldn't we throw an error when the mode is candidate
> regardless of the node role?
>
>> + diag_set(ClientError, ER_UNSUPPORTED, "election_mode="
>> + "'candidate'", "manual elections");
>> + return -1;
>> + }
>> + break;
>> + default:
>> + unreachable();
>> + }
>> +
>> uint32_t former_leader_id = txn_limbo.owner_id;
>> int64_t wait_lsn = txn_limbo.confirmed_lsn;
>> int rc = 0;
>> int quorum = replication_synchro_quorum;
>> in_clear_synchro_queue = true;
>>
>> + if (run_elections) {
>> + /*
>> + * Make this instance a candidate and run until some leader, not
>> + * necessarily this instance, emerges.
>> + */
>> + raft_cfg_is_candidate(box_raft(), true, false);
>> + /*
>> + * Trigger new elections without waiting for an old leader to
>> + * disappear.
>> + */
>> + raft_new_term(box_raft());
>> + box_raft_wait_leader_found();
>> + raft_cfg_is_candidate(box_raft(), false, false);
> 2. What if during box_raft_wait_leader_found() I made the node candidate
> via box.cfg? Won't you then reset it back to non-candidate here?
Yes, that's true.
>
> It probably should reset the current box.cfg mode back. Not just
> remove the candidate flag.
I think it's strange to reconfigure box automatically.
I suggest to reset node to non-candidate only if the mode
remains MANUAL after the election.
>
>> + if (!box_raft()->is_enabled) {
>> + diag_set(ClientError, ER_RAFT_DISABLED);
>> + in_clear_synchro_queue = false;
>> + return -1;
>> + }
>> + if (box_raft()->state != RAFT_STATE_LEADER) {
>> + diag_set(ClientError, ER_INTERFERING_PROMOTE,
>> + box_raft()->leader);
>> + in_clear_synchro_queue = false;
>> + return -1;
>> + }
>> + }
>> +
>> if (txn_limbo_is_empty(&txn_limbo))
>> goto promote;
>>
>> diff --git a/src/box/raft.c b/src/box/raft.c
>> index 285dbe4fd..c7dc79f9b 100644
>> --- a/src/box/raft.c
>> +++ b/src/box/raft.c
>> @@ -336,6 +336,28 @@ fail:
>> panic("Could not write a raft request to WAL\n");
>> }
>>
>> +static int
>> +box_raft_wait_leader_found_trig(struct trigger *trig, void *event)
> 3. I thought we usually call triggers with _f suffix, not _trig.
Sure. Fixed.
>
>> +{
>> + struct raft *raft = (struct raft *)event;
>> + assert(raft == box_raft());
>> + struct fiber *waiter = (struct fiber *)trig->data;
> 4. No need to cast this and event - void * cast works naturally in C.
Ok.
>
>> + if (raft->leader != REPLICA_ID_NIL || !raft->is_enabled)
>> + fiber_wakeup(waiter);
>> + return 0;
>> +}
>> diff --git a/src/box/raft.h b/src/box/raft.h
>> index 15f4e80d9..8fce423e1 100644
>> --- a/src/box/raft.h
>> +++ b/src/box/raft.h
>> @@ -97,6 +97,9 @@ box_raft_checkpoint_remote(struct raft_request *req);
>> int
>> box_raft_process(struct raft_request *req, uint32_t source);
>>
>> +void
>> +box_raft_wait_leader_found();
>> +
>> void
>> box_raft_init(void);
>>
>> diff --git a/src/lib/raft/raft.c b/src/lib/raft/raft.c
>> index e9ce8cade..7b77e05ea 100644
>> --- a/src/lib/raft/raft.c
>> +++ b/src/lib/raft/raft.c
>> @@ -846,7 +846,7 @@ raft_cfg_is_enabled(struct raft *raft, bool is_enabled)
>> }
>>
>> void
>> -raft_cfg_is_candidate(struct raft *raft, bool is_candidate)
>> +raft_cfg_is_candidate(struct raft *raft, bool is_candidate, bool demote)
> 5. I know it might lead to some code duplication, but probably
> better move that to other functions. For example,
>
> raft_cfg_is_temporary_candidate()
>
> or something like that. Otherwise it appears surprisingly hard
> to follow these 2 flags together. Although I might be wrong and
> it would look worse. Did you try?
>
> Or another option:
>
> raft_cfg_is_candidate(box_raft(), true, false);
> raft_cfg_is_candidate(box_raft(), false, false);
>
> turns into
>
> raft_start_candidate(box_raft())
> raft_stop_candidate(box_raft())
>
> Also it would be good to have unit tests for the changes in raft.h
> and raft.c.
This variant sounds good. I'll implement in in a new commit.
Incremental diff:
==========================================
diff --git a/src/box/box.cc b/src/box/box.cc
index 37938df15..fcd812c09 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1157,8 +1157,7 @@ box_set_election_mode(void)
if (mode == ELECTION_MODE_INVALID)
return -1;
box_election_mode = mode;
- raft_cfg_is_candidate(box_raft(), mode == ELECTION_MODE_CANDIDATE,
- true);
+ raft_cfg_is_candidate(box_raft(), mode == ELECTION_MODE_CANDIDATE);
raft_cfg_is_enabled(box_raft(), mode != ELECTION_MODE_OFF);
return 0;
}
@@ -1534,11 +1533,7 @@ box_clear_synchro_queue(bool try_wait)
"manual elections");
return -1;
case ELECTION_MODE_MANUAL:
- assert(box_raft()->state != RAFT_STATE_CANDIDATE);
- if (box_raft()->state == RAFT_STATE_LEADER) {
- diag_set(ClientError, ER_ALREADY_LEADER);
- return -1;
- }
+ assert(box_raft()->state == RAFT_STATE_FOLLOWER);
run_elections = true;
try_wait = false;
break;
@@ -1569,14 +1564,19 @@ box_clear_synchro_queue(bool try_wait)
* Make this instance a candidate and run until some leader, not
* necessarily this instance, emerges.
*/
- raft_cfg_is_candidate(box_raft(), true, false);
+ raft_start_candidate(box_raft());
/*
* Trigger new elections without waiting for an old leader to
* disappear.
*/
raft_new_term(box_raft());
box_raft_wait_leader_found();
- raft_cfg_is_candidate(box_raft(), false, false);
+ /*
+ * Do not reset raft mode if it was changed while running the
+ * elections.
+ */
+ if (box_election_mode == ELECTION_MODE_MANUAL)
+ raft_stop_candidate(box_raft(), false);
if (!box_raft()->is_enabled) {
diag_set(ClientError, ER_RAFT_DISABLED);
in_clear_synchro_queue = false;
diff --git a/src/box/errcode.h b/src/box/errcode.h
index df36085db..d93820e96 100644
--- a/src/box/errcode.h
+++ b/src/box/errcode.h
@@ -277,7 +277,6 @@ struct errcode_record {
/*222 */_(ER_QUORUM_WAIT, "Couldn't wait for quorum %d: %s") \
/*223 */_(ER_INTERFERING_PROMOTE, "Instance with replica id %u
was promoted first") \
/*224 */_(ER_RAFT_DISABLED, "Elections were turned off
while running box.ctl.promote()")\
- /*225 */_(ER_ALREADY_LEADER, "Can't promote an existing
leader")\
/*
* !IMPORTANT! Please follow instructions at start of the file
diff --git a/src/box/raft.c b/src/box/raft.c
index c7dc79f9b..425353207 100644
--- a/src/box/raft.c
+++ b/src/box/raft.c
@@ -337,11 +337,11 @@ fail:
}
static int
-box_raft_wait_leader_found_trig(struct trigger *trig, void *event)
+box_raft_wait_leader_found_f(struct trigger *trig, void *event)
{
- struct raft *raft = (struct raft *)event;
+ struct raft *raft = event;
assert(raft == box_raft());
- struct fiber *waiter = (struct fiber *)trig->data;
+ struct fiber *waiter = trig->data;
if (raft->leader != REPLICA_ID_NIL || !raft->is_enabled)
fiber_wakeup(waiter);
return 0;
@@ -351,7 +351,7 @@ void
box_raft_wait_leader_found(void)
{
struct trigger trig;
- trigger_create(&trig, box_raft_wait_leader_found_trig, fiber(), NULL);
+ trigger_create(&trig, box_raft_wait_leader_found_f, fiber(), NULL);
raft_on_update(box_raft(), &trig);
fiber_yield();
assert(box_raft()->leader != REPLICA_ID_NIL ||
!box_raft()->is_enabled);
diff --git a/src/lib/raft/raft.c b/src/lib/raft/raft.c
index d557c907b..8deb06eb5 100644
--- a/src/lib/raft/raft.c
+++ b/src/lib/raft/raft.c
@@ -846,7 +846,7 @@ raft_cfg_is_enabled(struct raft *raft, bool is_enabled)
}
void
-raft_cfg_is_candidate(struct raft *raft, bool is_candidate, bool demote)
+raft_cfg_is_candidate(struct raft *raft, bool is_candidate)
{
raft->is_cfg_candidate = is_candidate;
is_candidate = is_candidate && raft->is_enabled;
diff --git a/src/lib/raft/raft.h b/src/lib/raft/raft.h
index b140ff3ba..69dec63c6 100644
--- a/src/lib/raft/raft.h
+++ b/src/lib/raft/raft.h
@@ -325,7 +325,7 @@ raft_cfg_is_enabled(struct raft *raft, bool is_enabled);
* the node still can vote, when Raft is enabled.
*/
void
-raft_cfg_is_candidate(struct raft *raft, bool is_candidate, bool demote);
+raft_cfg_is_candidate(struct raft *raft, bool is_candidate);
/**
* Make the instance a candidate.
diff --git a/test/box/error.result b/test/box/error.result
index dad6a21d3..cc8cbaaa9 100644
--- a/test/box/error.result
+++ b/test/box/error.result
@@ -443,7 +443,6 @@ t;
| 222: box.error.QUORUM_WAIT
| 223: box.error.INTERFERING_PROMOTE
| 224: box.error.RAFT_DISABLED
- | 225: box.error.ALREADY_LEADER
| ...
test_run:cmd("setopt delimiter ''");
diff --git a/test/unit/raft_test_utils.c b/test/unit/raft_test_utils.c
index a10ccae6a..b8735f373 100644
--- a/test/unit/raft_test_utils.c
+++ b/test/unit/raft_test_utils.c
@@ -360,7 +360,7 @@ raft_node_start(struct raft_node *node)
raft_process_recovery(&node->raft, &node->journal.rows[i]);
raft_cfg_is_enabled(&node->raft, node->cfg_is_enabled);
- raft_cfg_is_candidate(&node->raft, node->cfg_is_candidate, true);
+ raft_cfg_is_candidate(&node->raft, node->cfg_is_candidate);
raft_cfg_election_timeout(&node->raft, node->cfg_election_timeout);
raft_cfg_election_quorum(&node->raft, node->cfg_election_quorum);
raft_cfg_death_timeout(&node->raft, node->cfg_death_timeout);
@@ -402,7 +402,7 @@ raft_node_cfg_is_candidate(struct raft_node *node,
bool value)
{
node->cfg_is_candidate = value;
if (raft_node_is_started(node)) {
- raft_cfg_is_candidate(&node->raft, value, true);
+ raft_cfg_is_candidate(&node->raft, value);
raft_run_async_work();
}
}
--
Serge Petrenko
More information about the Tarantool-patches
mailing list