[Tarantool-patches] [PATCH v3 04/12] box: make promote always bump the term

Serge Petrenko sergepetrenko at tarantool.org
Tue Jun 29 01:12:50 MSK 2021


When called without elections, promote resulted in multiple
PROMOTE entries for the same term. This is not right, because all
the promotions for the same term except the first one would be ignored
as already seen.

Part-of #6034
---
 src/box/box.cc                                | 13 ++++---
 src/box/raft.c                                | 36 ++++++++++++++++++
 src/box/raft.h                                |  4 ++
 .../gh-4114-local-space-replication.result    |  7 ++--
 .../gh-4114-local-space-replication.test.lua  |  4 +-
 .../gh-6034-promote-bump-term.result          | 37 +++++++++++++++++++
 .../gh-6034-promote-bump-term.test.lua        | 16 ++++++++
 test/replication/suite.cfg                    |  1 +
 8 files changed, 107 insertions(+), 11 deletions(-)
 create mode 100644 test/replication/gh-6034-promote-bump-term.result
 create mode 100644 test/replication/gh-6034-promote-bump-term.test.lua

diff --git a/src/box/box.cc b/src/box/box.cc
index 6a0950f44..ce37b307d 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -1687,16 +1687,19 @@ box_promote(void)
 			rc = -1;
 		} else {
 promote:
-			/* We cannot possibly get here in a volatile state. */
-			assert(box_raft()->volatile_term == box_raft()->term);
-			txn_limbo_write_promote(&txn_limbo, wait_lsn,
-						box_raft()->term);
+			if (try_wait) {
+				raft_new_term(box_raft());
+				if (box_raft_wait_persisted() < 0)
+					return -1;
+			}
+			uint64_t term = box_raft()->term;
+			txn_limbo_write_promote(&txn_limbo, wait_lsn, term);
 			struct synchro_request req = {
 				.type = IPROTO_PROMOTE,
 				.replica_id = former_leader_id,
 				.origin_id = instance_id,
 				.lsn = wait_lsn,
-				.term = box_raft()->term,
+				.term = term,
 			};
 			txn_limbo_process(&txn_limbo, &req);
 			assert(txn_limbo_is_empty(&txn_limbo));
diff --git a/src/box/raft.c b/src/box/raft.c
index 7f787c0c5..17caf6f54 100644
--- a/src/box/raft.c
+++ b/src/box/raft.c
@@ -354,6 +354,42 @@ box_raft_wait_leader_found(void)
 	return 0;
 }
 
+struct raft_wait_persisted_data {
+	struct fiber *waiter;
+	uint64_t term;
+};
+
+static int
+box_raft_wait_persisted_f(struct trigger *trig, void *event)
+{
+	struct raft *raft = event;
+	struct raft_wait_persisted_data *data = trig->data;
+	if (raft->term >= data->term)
+		fiber_wakeup(data->waiter);
+	return 0;
+}
+
+int
+box_raft_wait_persisted(void)
+{
+	if (box_raft()->term == box_raft()->volatile_term)
+		return 0;
+	struct raft_wait_persisted_data data = {
+		.waiter = fiber(),
+		.term = box_raft()->volatile_term,
+	};
+	struct trigger trig;
+	trigger_create(&trig, box_raft_wait_persisted_f, &data, NULL);
+	raft_on_update(box_raft(), &trig);
+	fiber_yield();
+	trigger_clear(&trig);
+	if (fiber_is_cancelled()) {
+		diag_set(FiberIsCancelled);
+		return -1;
+	}
+	return 0;
+}
+
 void
 box_raft_init(void)
 {
diff --git a/src/box/raft.h b/src/box/raft.h
index 6b6136510..6e27b098f 100644
--- a/src/box/raft.h
+++ b/src/box/raft.h
@@ -101,6 +101,10 @@ box_raft_process(struct raft_request *req, uint32_t source);
 int
 box_raft_wait_leader_found();
 
+/** Block this fiber until the current volatile term is persisted. */
+int
+box_raft_wait_persisted(void);
+
 void
 box_raft_init(void);
 
diff --git a/test/replication/gh-4114-local-space-replication.result b/test/replication/gh-4114-local-space-replication.result
index 9b63a4b99..e71eb60a8 100644
--- a/test/replication/gh-4114-local-space-replication.result
+++ b/test/replication/gh-4114-local-space-replication.result
@@ -45,9 +45,8 @@ test_run:cmd('switch replica')
  | ---
  | - true
  | ...
-box.info.vclock[0]
+a = box.info.vclock[0] or 0
  | ---
- | - null
  | ...
 box.cfg{checkpoint_count=1}
  | ---
@@ -77,9 +76,9 @@ box.space.test:insert{3}
  | - [3]
  | ...
 
-box.info.vclock[0]
+assert(box.info.vclock[0] == a + 3)
  | ---
- | - 3
+ | - true
  | ...
 
 test_run:cmd('switch default')
diff --git a/test/replication/gh-4114-local-space-replication.test.lua b/test/replication/gh-4114-local-space-replication.test.lua
index c18fb3b10..65fef3bf6 100644
--- a/test/replication/gh-4114-local-space-replication.test.lua
+++ b/test/replication/gh-4114-local-space-replication.test.lua
@@ -18,7 +18,7 @@ for i = 1,10 do box.space.test:insert{i} end
 box.info.vclock[0] == a + 10 or box.info.vclock[0] - a
 
 test_run:cmd('switch replica')
-box.info.vclock[0]
+a = box.info.vclock[0] or 0
 box.cfg{checkpoint_count=1}
 box.space.test:select{}
 box.space.test:insert{1}
@@ -27,7 +27,7 @@ box.space.test:insert{2}
 box.snapshot()
 box.space.test:insert{3}
 
-box.info.vclock[0]
+assert(box.info.vclock[0] == a + 3)
 
 test_run:cmd('switch default')
 
diff --git a/test/replication/gh-6034-promote-bump-term.result b/test/replication/gh-6034-promote-bump-term.result
new file mode 100644
index 000000000..20e352922
--- /dev/null
+++ b/test/replication/gh-6034-promote-bump-term.result
@@ -0,0 +1,37 @@
+-- test-run result file version 2
+test_run = require('test_run').new()
+ | ---
+ | ...
+
+-- gh-6034: test that every box.ctl.promote() bumps
+-- the instance's term. Even when elections are disabled. Even for consequent
+-- promotes on the same instance.
+election_mode = box.cfg.election_mode
+ | ---
+ | ...
+box.cfg{election_mode='off'}
+ | ---
+ | ...
+
+term = box.info.election.term
+ | ---
+ | ...
+box.ctl.promote()
+ | ---
+ | ...
+assert(box.info.election.term == term + 1)
+ | ---
+ | - true
+ | ...
+box.ctl.promote()
+ | ---
+ | ...
+assert(box.info.election.term == term + 2)
+ | ---
+ | - true
+ | ...
+
+-- Cleanup.
+box.cfg{election_mode=election_mode}
+ | ---
+ | ...
diff --git a/test/replication/gh-6034-promote-bump-term.test.lua b/test/replication/gh-6034-promote-bump-term.test.lua
new file mode 100644
index 000000000..5847dbb8f
--- /dev/null
+++ b/test/replication/gh-6034-promote-bump-term.test.lua
@@ -0,0 +1,16 @@
+test_run = require('test_run').new()
+
+-- gh-6034: test that every box.ctl.promote() bumps
+-- the instance's term. Even when elections are disabled. Even for consequent
+-- promotes on the same instance.
+election_mode = box.cfg.election_mode
+box.cfg{election_mode='off'}
+
+term = box.info.election.term
+box.ctl.promote()
+assert(box.info.election.term == term + 1)
+box.ctl.promote()
+assert(box.info.election.term == term + 2)
+
+-- Cleanup.
+box.cfg{election_mode=election_mode}
diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg
index c4b3fbd9c..496b2e104 100644
--- a/test/replication/suite.cfg
+++ b/test/replication/suite.cfg
@@ -48,6 +48,7 @@
     "gh-5613-bootstrap-prefer-booted.test.lua": {},
     "gh-6027-applier-error-show.test.lua": {},
     "gh-6032-promote-wal-write.test.lua": {},
+    "gh-6034-promote-bump-term.test.lua": {},
     "gh-6057-qsync-confirm-async-no-wal.test.lua": {},
     "gh-6094-rs-uuid-mismatch.test.lua": {},
     "gh-6127-election-join-new.test.lua": {},
-- 
2.30.1 (Apple Git-130)



More information about the Tarantool-patches mailing list