[Tarantool-patches] [PATCH v4 12/16] box: fix an assertion failure after a spurious wakeup in promote

Serge Petrenko sergepetrenko at tarantool.org
Wed Jul 14 21:25:40 MSK 2021


Follow-up #3055
---
 src/box/raft.c                                |  8 +++-
 .../gh-3055-promote-wakeup-crash.result       | 43 +++++++++++++++++++
 .../gh-3055-promote-wakeup-crash.test.lua     | 20 +++++++++
 test/replication/suite.cfg                    |  1 +
 4 files changed, 70 insertions(+), 2 deletions(-)
 create mode 100644 test/replication/gh-3055-promote-wakeup-crash.result
 create mode 100644 test/replication/gh-3055-promote-wakeup-crash.test.lua

diff --git a/src/box/raft.c b/src/box/raft.c
index b04932cd9..d16ec952a 100644
--- a/src/box/raft.c
+++ b/src/box/raft.c
@@ -344,13 +344,17 @@ box_raft_wait_leader_found(void)
 	struct trigger trig;
 	trigger_create(&trig, box_raft_wait_leader_found_f, fiber(), NULL);
 	raft_on_update(box_raft(), &trig);
-	fiber_yield();
+
+	do {
+		fiber_yield();
+	} while (box_raft()->is_enabled && !fiber_is_cancelled() &&
+		 box_raft()->leader == REPLICA_ID_NIL);
+
 	trigger_clear(&trig);
 	if (fiber_is_cancelled()) {
 		diag_set(FiberIsCancelled);
 		return -1;
 	}
-	assert(box_raft()->leader != REPLICA_ID_NIL || !box_raft()->is_enabled);
 	return 0;
 }
 
diff --git a/test/replication/gh-3055-promote-wakeup-crash.result b/test/replication/gh-3055-promote-wakeup-crash.result
new file mode 100644
index 000000000..e508611e5
--- /dev/null
+++ b/test/replication/gh-3055-promote-wakeup-crash.result
@@ -0,0 +1,43 @@
+-- test-run result file version 2
+test_run = require('test_run').new()
+ | ---
+ | ...
+--
+-- gh-3055 follow-up: box.ctl.promote() could crash on an assertion after a
+-- spurious wakeup.
+--
+_ = box.space._cluster:insert{2, require('uuid').str()}
+ | ---
+ | ...
+box.cfg{election_mode='manual',\
+        replication_synchro_quorum=2,\
+        election_timeout=1000}
+ | ---
+ | ...
+
+fiber = require('fiber')
+ | ---
+ | ...
+f = fiber.create(function() box.ctl.promote() end)
+ | ---
+ | ...
+f:set_joinable(true)
+ | ---
+ | ...
+f:wakeup()
+ | ---
+ | ...
+fiber.yield()
+ | ---
+ | ...
+
+-- Cleanup.
+f:cancel()
+ | ---
+ | ...
+box.cfg{election_mode='off'}
+ | ---
+ | ...
+test_run:cleanup_cluster()
+ | ---
+ | ...
diff --git a/test/replication/gh-3055-promote-wakeup-crash.test.lua b/test/replication/gh-3055-promote-wakeup-crash.test.lua
new file mode 100644
index 000000000..2ac901b08
--- /dev/null
+++ b/test/replication/gh-3055-promote-wakeup-crash.test.lua
@@ -0,0 +1,20 @@
+test_run = require('test_run').new()
+--
+-- gh-3055 follow-up: box.ctl.promote() could crash on an assertion after a
+-- spurious wakeup.
+--
+_ = box.space._cluster:insert{2, require('uuid').str()}
+box.cfg{election_mode='manual',\
+        replication_synchro_quorum=2,\
+        election_timeout=1000}
+
+fiber = require('fiber')
+f = fiber.create(function() box.ctl.promote() end)
+f:set_joinable(true)
+f:wakeup()
+fiber.yield()
+
+-- Cleanup.
+f:cancel()
+box.cfg{election_mode='off'}
+test_run:cleanup_cluster()
diff --git a/test/replication/suite.cfg b/test/replication/suite.cfg
index 7f9014b22..8b2204e2a 100644
--- a/test/replication/suite.cfg
+++ b/test/replication/suite.cfg
@@ -3,6 +3,7 @@
     "anon_register_gap.test.lua": {},
     "gh-2991-misc-asserts-on-update.test.lua": {},
     "gh-3055-election-promote.test.lua": {},
+    "gh-3055-promote-wakeup-crash.test.lua": {},
     "gh-3111-misc-rebootstrap-from-ro-master.test.lua": {},
     "gh-3160-misc-heartbeats-on-master-changes.test.lua": {},
     "gh-3247-misc-iproto-sequence-value-not-replicated.test.lua": {},
-- 
2.30.1 (Apple Git-130)



More information about the Tarantool-patches mailing list