[Tarantool-patches] [PATCH v3 07/12] box: introduce `box.ctl.demote`
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Thu Jul 22 02:28:32 MSK 2021
Thanks for the fixes!
>> 2. On top of this commit and on top of the branch too I tried to
>> promote a candidate and got a strange error in the logs, although
>> the promotion was successful:
>>
>> --
>> -- Instance 1
>> --
>>
>> -- Step 1
>> box.cfg{
>> listen = 3313,
>> election_mode = 'candidate',
>> replication_synchro_quorum = 2,
>> replication = {'localhost:3314', 'localhost:3313'}
>> }
>> box.schema.user.grant('guest', 'super')
>>
>>
>> --
>> -- Instance 2
>> --
>>
>> -- Step 2
>> box.cfg{
>> listen = 3314,
>> election_mode = 'voter',
>> replication_synchro_quorum = 2,
>> replication = {'localhost:3314', 'localhost:3313'},
>> read_only = true,
>> }
>>
>> -- Step 3
>> box.cfg{read_only = false, election_mode = 'candidate'}
>>
>> -- Step 4
>> box.ctl.promote()
>>
>> main/112/raft_worker box.cc:1538 E> ER_UNSUPPORTED: box.ctl.promote does not support simultaneous invocations
>> ---
>> ...
>
> That's because once a candidate becomes the leader, it tries to issue `box.ctl.promote()`, and fails,
> since we're already in `box.ctl.promote()` call.
> I'm not sure how to handle that properly. This doesn't break anything though.
Still, the error message looks really not good. There is an option -
make box_promote() for candidate node just call raft_promote() and set
box_in_promote = false. Then wait for the term outcome. Will it work?
You would need to rebase your patchset on master branch then.
It might be a little easier to do if you apply the diff below. (Warning:
I didn't test it.) The motivation is that one of the main reasons why I
wanted box_promote() simplified was because of the strange meaning of some
flags. In particular, try_wait flag somewhy triggered elections before the
waiting which is super not obvious why. How does 'wait' come to 'elections'?
In the diff I tried to remove these flags entirely. And now you have a
single place in the code of box_promote(), where ELECTION_MODE_CANDIDATE
stuff is handled. Here you could try the proposal I gave above.
====================
diff --git a/src/box/box.cc b/src/box/box.cc
index f68fffcab..e7765b657 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1698,34 +1698,6 @@ box_issue_demote(uint32_t prev_leader_id, int64_t promote_lsn)
assert(txn_limbo_is_empty(&txn_limbo));
}
-/**
- * Check whether this instance may run a promote() and set promote parameters
- * according to its election mode.
- */
-static int
-box_check_promote_election_mode(bool *try_wait, bool *run_elections)
-{
- switch (box_election_mode) {
- case ELECTION_MODE_OFF:
- if (try_wait != NULL)
- *try_wait = true;
- break;
- case ELECTION_MODE_VOTER:
- assert(box_raft()->state == RAFT_STATE_FOLLOWER);
- diag_set(ClientError, ER_UNSUPPORTED, "election_mode='voter'",
- "manual elections");
- return -1;
- case ELECTION_MODE_MANUAL:
- case ELECTION_MODE_CANDIDATE:
- if (run_elections != NULL)
- *run_elections = box_raft()->state != RAFT_STATE_LEADER;
- break;
- default:
- unreachable();
- }
- return 0;
-}
-
/* A guard to block multiple simultaneous promote()/demote() invocations. */
static bool box_in_promote = false;
@@ -1757,27 +1729,35 @@ box_promote(void)
if (is_leader)
return 0;
- bool run_elections = false;
- bool try_wait = false;
-
- if (box_check_promote_election_mode(&try_wait, &run_elections) < 0)
- return -1;
-
- int64_t wait_lsn = -1;
-
- if (run_elections && box_run_elections() < 0)
- return -1;
- if (try_wait) {
- if (box_try_wait_confirm(2 * replication_synchro_timeout) < 0)
+ switch (box_election_mode) {
+ case ELECTION_MODE_OFF:
+ if (box_try_wait_confirm(2 * replication_synchro_timeout) != 0)
+ return -1;
+ if (box_trigger_elections() != 0)
return -1;
- if (box_trigger_elections() < 0)
+ break;
+ case ELECTION_MODE_VOTER:
+ assert(box_raft()->state == RAFT_STATE_FOLLOWER);
+ diag_set(ClientError, ER_UNSUPPORTED, "election_mode='voter'",
+ "manual elections");
+ return -1;
+ case ELECTION_MODE_MANUAL:
+ case ELECTION_MODE_CANDIDATE:
+ if (box_raft()->state != RAFT_STATE_LEADER &&
+ box_run_elections() != 0)
return -1;
+ break;
+ default:
+ unreachable();
}
- if ((wait_lsn = box_wait_limbo_acked()) < 0)
+ if (box_check_promote_election_mode(&try_wait, &run_elections) < 0)
return -1;
- box_issue_promote(txn_limbo.owner_id, wait_lsn);
+ int64_t wait_lsn = box_wait_limbo_acked();
+ if (wait_lsn < 0)
+ return -1;
+ box_issue_promote(txn_limbo.owner_id, wait_lsn);
return 0;
}
@@ -1804,29 +1784,16 @@ box_demote(void)
is_leader = is_leader && box_raft()->state == RAFT_STATE_LEADER;
if (!is_leader)
return 0;
-
- bool try_wait = false;
-
- if (box_check_promote_election_mode(&try_wait, NULL) < 0)
- return -1;
-
- int64_t wait_lsn = -1;
-
if (box_trigger_elections() < 0)
return -1;
-
if (box_election_mode != ELECTION_MODE_OFF)
return 0;
-
- if (try_wait &&
- box_try_wait_confirm(2 * replication_synchro_timeout) < 0)
+ if (box_try_wait_confirm(2 * replication_synchro_timeout) != 0)
return -1;
-
- if ((wait_lsn = box_wait_limbo_acked()) < 0)
+ int64_t wait_lsn = box_wait_limbo_acked();
+ if (wait_lsn < 0)
return -1;
-
box_issue_demote(txn_limbo.owner_id, wait_lsn);
-
return 0;
}
More information about the Tarantool-patches
mailing list