[Tarantool-patches] [PATCH v4 12/16] box: fix an assertion failure after a spurious wakeup in promote
Serge Petrenko
sergepetrenko at tarantool.org
Wed Jul 14 21:25:40 MSK 2021
Follow-up #3055
---
src/box/raft.c | 8 +++-
.../gh-3055-promote-wakeup-crash.result | 43 +++++++++++++++++++
.../gh-3055-promote-wakeup-crash.test.lua | 20 +++++++++
test/replication/suite.cfg | 1 +
4 files changed, 70 insertions(+), 2 deletions(-)
create mode 100644 test/replication/gh-3055-promote-wakeup-crash.result
create mode 100644 test/replication/gh-3055-promote-wakeup-crash.test.lua
diff --git a/src/box/raft.c b/src/box/raft.c
index b04932cd9..d16ec952a 100644
--- a/src/box/raft.c
+++ b/src/box/raft.c
@@ -344,13 +344,17 @@ box_raft_wait_leader_found(void)
struct trigger trig;
trigger_create(&trig, box_raft_wait_leader_found_f, fiber(), NULL);
raft_on_update(box_raft(), &trig);
- fiber_yield();
+
+ do {
+ fiber_yield();
+ } while (box_raft()->is_enabled && !fiber_is_cancelled() &&
+ box_raft()->leader == REPLICA_ID_NIL);
+
trigger_clear(&trig);
if (fiber_is_cancelled()) {
diag_set(FiberIsCancelled);
return -1;
}
- assert(box_raft()->leader != REPLICA_ID_NIL || !box_raft()->is_enabled);
return 0;
}
diff --git a/test/replication/gh-3055-promote-wakeup-crash.result b/test/replication/gh-3055-promote-wakeup-crash.result
new file mode 100644
index 000000000..e508611e5
--- /dev/null
+++ b/test/replication/gh-3055-promote-wakeup-crash.result
@@ -0,0 +1,43 @@
+-- test-run result file version 2
+test_run = require('test_run').new()
+ | ---
+ | ...
+--
+-- gh-3055 follow-up: box.ctl.promote() could crash on an assertion after a
+-- spurious wakeup.
+--
+_ = box.space._cluster:insert{2, require('uuid').str()}
+ | ---
+ | ...
+box.cfg{election_mode='manual',\
+ replication_synchro_quorum=2,\
+ election_timeout=1000}
+ | ---
+ | ...
+
+fiber = require('fiber')
+ | ---
+ | ...
+f = fiber.create(function() box.ctl.promote() end)
+ | ---
+ | ...
+f:set_joinable(true)
+ | ---
+ | ...
+f:wakeup()
+ | ---
+ | ...
+fiber.yield()
+ | ---
+ | ...
+
+-- Cleanup.
+f:cancel()
+ | ---
+ | ...
+box.cfg{election_mode='off'}
+ | ---
+ | ...
+test_run:cleanup_cluster()
+ | ---
+ | ...
diff --git a/test/replication/gh-3055-promote-wakeup-crash.test.lua b/test/replication/gh-3055-promote-wakeup-crash.test.lua
new file mode 100644
index 000000000..2ac901b08
--- /dev/null
+++ b/test/replication/gh-3055-promote-wakeup-crash.test.lua
@@ -0,0 +1,20 @@
+test_run = require('test_run').new()
+--
+-- gh-3055 follow-up: box.ctl.promote() could crash on an assertion after a
+-- spurious wakeup.
+--
+_ = box.space._cluster:insert{2, require('uuid').str()}
+box.cfg{election_mode='manual',\
+ replication_synchro_quorum=2,\
+ election_timeout=1000}
+
+fiber = require('fiber')
+f = fiber.create(function() box.ctl.promote() end)
+f:set_joinable(true)
+f:wakeup()
+fiber.yield()
+
+-- Cleanup.
+f:cancel()
+box.cfg{election_mode='off'}
+test_run:cleanup_cluster()
diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg
index 7f9014b22..8b2204e2a 100644
--- a/test/replication/suite.cfg
+++ b/test/replication/suite.cfg
@@ -3,6 +3,7 @@
"anon_register_gap.test.lua": {},
"gh-2991-misc-asserts-on-update.test.lua": {},
"gh-3055-election-promote.test.lua": {},
+ "gh-3055-promote-wakeup-crash.test.lua": {},
"gh-3111-misc-rebootstrap-from-ro-master.test.lua": {},
"gh-3160-misc-heartbeats-on-master-changes.test.lua": {},
"gh-3247-misc-iproto-sequence-value-not-replicated.test.lua": {},
--
2.30.1 (Apple Git-130)
More information about the Tarantool-patches
mailing list