[Tarantool-patches] [PATCH 1/2] fiber: make wakeup in Lua and C nop on self
Vladislav Shpilevoy
v.shpilevoy at tarantool.org
Sat Apr 24 02:15:53 MSK 2021
fiber.wakeup() in Lua and fiber_wakeup() in C could lead to a
crash or undefined behaviour when called on the currently running
fiber.
In particular, if after wakeup was started a new fiber in a
blocking way (fiber.create() and fiber_start()) it would crash in
debug build, and lead to unknown results in release.
If after wakeup was made a sleep with non-zero timeout or an
infinite yield (fiber_yield()), the fiber woke up in the same
event loop iteration regardless of any timeout or other wakeups.
It was a spurious wakeup, which is not expected in most of the
places internally.
The patch makes the wakeup nop on the current fiber making it safe
to use anywhere.
Closes #5292
Closes #6043
@TarantoolBot document
Title: fiber.wakeup() in Lua and fiber_wakeup() in C are nop on self
In Lua `fiber.wakeup()` being called on the current fiber does not
do anything, safe to use. The same for `fiber_wakeup()` in C.
---
.../unreleased/gh-6043-fiber-wakeup-self.md | 33 +++++++
src/lib/core/fiber.c | 85 +++++++++++--------
src/lib/core/fiber.h | 6 +-
.../gh-6043-fiber-wakeup-self.test.lua | 50 +++++++++++
test/unit/fiber.cc | 33 ++++++-
test/unit/fiber.result | 2 +
test/unit/fiber_stress.cc | 6 +-
7 files changed, 171 insertions(+), 44 deletions(-)
create mode 100644 changelogs/unreleased/gh-6043-fiber-wakeup-self.md
create mode 100755 test/app-tap/gh-6043-fiber-wakeup-self.test.lua
diff --git a/changelogs/unreleased/gh-6043-fiber-wakeup-self.md b/changelogs/unreleased/gh-6043-fiber-wakeup-self.md
new file mode 100644
index 000000000..5869e2696
--- /dev/null
+++ b/changelogs/unreleased/gh-6043-fiber-wakeup-self.md
@@ -0,0 +1,33 @@
+## bugfix/core
+
+* **[Breaking change]** `fiber.wakeup()` in Lua and `fiber_wakeup()` in C became
+ NOP on the currently running fiber. Previously they allowed to "ignore" the
+ next yield or sleep leading to unexpected spurious wakeups. Could lead to a
+ crash (in debug build) or undefined behaviour (in release build) if called
+ right before `fiber.create()` in Lua or `fiber_start()` in C (gh-6043).
+
+ There was a single usecase for that - reschedule in the same event loop
+ iteration which is not the same as `fiber.sleep(0)` in Lua and
+ `fiber_sleep(0)` in C. Could be done in C like that:
+ ```C
+ fiber_wakeup(fiber_self());
+ fiber_yield();
+ ```
+ and in Lua like that:
+ ```Lua
+ fiber.self():wakeup()
+ fiber.yield()
+ ```
+ Now to get the same effect in C use `fiber_reschedule()`. In Lua it is now
+ simply impossible to reschedule the current fiber in the same event loop
+ iteration directly. But still can reschedule self through a second fiber like
+ this (**never use it, please**):
+ ```Lua
+ local self = fiber.self()
+ fiber.new(function() self:wakeup() end)
+ fiber.sleep(0)
+ ```
+
+----
+Breaking change: `fiber.wakeup()` in Lua and `fiber_wakeup()` in C became NOP on
+the currently running fiber.
diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
index f8b85d99d..cb6295171 100644
--- a/src/lib/core/fiber.c
+++ b/src/lib/core/fiber.c
@@ -396,10 +396,13 @@ fiber_call_impl(struct fiber *callee)
assert(rlist_empty(&callee->state));
assert(caller);
assert(caller != callee);
+ assert((caller->flags & FIBER_IS_RUNNING) != 0);
+ assert((callee->flags & FIBER_IS_RUNNING) == 0);
+ caller->flags &= ~FIBER_IS_RUNNING;
cord->fiber = callee;
+ callee->flags = (callee->flags & ~FIBER_IS_READY) | FIBER_IS_RUNNING;
- callee->flags &= ~FIBER_IS_READY;
ASAN_START_SWITCH_FIBER(asan_state, 1,
callee->stack,
callee->stack_size);
@@ -443,19 +446,9 @@ fiber_checkstack(void)
return false;
}
-/**
- * Interrupt a synchronous wait of a fiber inside the event loop.
- * We do so by keeping an "async" event in every fiber, solely
- * for this purpose, and raising this event here.
- *
- * @note: if this is sent to self, followed by a fiber_yield()
- * call, it simply reschedules the fiber after other ready
- * fibers in the same event loop iteration.
- */
-void
-fiber_wakeup(struct fiber *f)
+static void
+fiber_make_ready(struct fiber *f)
{
- assert(! (f->flags & FIBER_IS_DEAD));
/**
* Do nothing if the fiber is already in cord->ready
* list *or* is in the call chain created by
@@ -464,21 +457,12 @@ fiber_wakeup(struct fiber *f)
* but the same game is deadly when the fiber is in
* the callee list created by fiber_schedule_list().
*
- * To put it another way, fiber_wakeup() is a 'request' to
+ * To put it another way, fiber_make_ready() is a 'request' to
* schedule the fiber for execution, and once it is executing
- * a wakeup request is considered complete and it must be
+ * the 'make ready' request is considered complete and it must be
* removed.
- *
- * A dead fiber can be lingering in the cord fiber list
- * if it is joinable. This makes it technically possible
- * to schedule it. We would never make such a mistake
- * in our own code, hence the assert above. But as long
- * as fiber.wakeup() is a part of public Lua API, an
- * external rock can mess things up. Ignore such attempts
- * as well.
*/
- if (f->flags & (FIBER_IS_READY | FIBER_IS_DEAD))
- return;
+ assert((f->flags & (FIBER_IS_DEAD | FIBER_IS_READY)) == 0);
struct cord *cord = cord();
if (rlist_empty(&cord->ready)) {
/*
@@ -503,6 +487,23 @@ fiber_wakeup(struct fiber *f)
f->flags |= FIBER_IS_READY;
}
+void
+fiber_wakeup(struct fiber *f)
+{
+ /*
+ * DEAD is checked both in the assertion and in the release build
+ * because it should not ever happen, at least internally. But in some
+ * user modules it might happen, and better ignore such fibers.
+ * Especially since this was allowed for quite some time in the public
+ * API and need to keep it if it costs nothing, for backward
+ * compatibility.
+ */
+ assert((f->flags & FIBER_IS_DEAD) == 0);
+ const int no_flags = FIBER_IS_READY | FIBER_IS_DEAD | FIBER_IS_RUNNING;
+ if ((f->flags & no_flags) == 0)
+ fiber_make_ready(f);
+}
+
/** Cancel the subject fiber.
*
* Note: cancelation is asynchronous. Use fiber_join() to wait for the
@@ -521,7 +522,6 @@ void
fiber_cancel(struct fiber *f)
{
assert(f->fid != 0);
- struct fiber *self = fiber();
/**
* Do nothing if the fiber is dead, since cancelling
* the fiber would clear the diagnostics area and
@@ -535,10 +535,8 @@ fiber_cancel(struct fiber *f)
/**
* Don't wake self and zombies.
*/
- if (f != self) {
- if (f->flags & FIBER_IS_CANCELLABLE)
- fiber_wakeup(f);
- }
+ if ((f->flags & FIBER_IS_CANCELLABLE) != 0)
+ fiber_wakeup(f);
}
/**
@@ -602,7 +600,14 @@ fiber_clock64(void)
void
fiber_reschedule(void)
{
- fiber_wakeup(fiber());
+ struct fiber *f = fiber();
+ /*
+ * The current fiber can't be dead, the flag is set when the fiber
+ * function returns. Can't be ready, because such status is assigned
+ * only to the queued fibers in the ready-list.
+ */
+ assert((f->flags & (FIBER_IS_READY | FIBER_IS_DEAD)) == 0);
+ fiber_make_ready(f);
fiber_yield();
}
@@ -686,8 +691,13 @@ fiber_yield(void)
assert(callee->flags & FIBER_IS_READY || callee == &cord->sched);
assert(! (callee->flags & FIBER_IS_DEAD));
+ assert((caller->flags & FIBER_IS_RUNNING) != 0);
+ assert((callee->flags & FIBER_IS_RUNNING) == 0);
+
+ caller->flags &= ~FIBER_IS_RUNNING;
cord->fiber = callee;
- callee->flags &= ~FIBER_IS_READY;
+ callee->flags = (callee->flags & ~FIBER_IS_READY) | FIBER_IS_RUNNING;
+
ASAN_START_SWITCH_FIBER(asan_state,
(caller->flags & FIBER_IS_DEAD) == 0,
callee->stack,
@@ -748,10 +758,6 @@ fiber_sleep(double delay)
if (delay == 0) {
ev_idle_start(loop(), &cord()->idle_event);
}
- /*
- * We don't use fiber_wakeup() here to ensure there is
- * no infinite wakeup loop in case of fiber_sleep(0).
- */
fiber_yield_timeout(delay);
if (delay == 0) {
@@ -864,7 +870,11 @@ fiber_reset(struct fiber *fiber)
{
rlist_create(&fiber->on_yield);
rlist_create(&fiber->on_stop);
- fiber->flags = FIBER_DEFAULT_FLAGS;
+ /*
+ * Preserve the running flag if set. Reset might be called on the
+ * current fiber when it is recycled.
+ */
+ fiber->flags = FIBER_DEFAULT_FLAGS | (fiber->flags & FIBER_IS_RUNNING);
#if ENABLE_FIBER_TOP
clock_stat_reset(&fiber->clock_stat);
#endif /* ENABLE_FIBER_TOP */
@@ -1449,6 +1459,7 @@ cord_create(struct cord *cord, const char *name)
cord->sched.name = NULL;
fiber_set_name(&cord->sched, "sched");
cord->fiber = &cord->sched;
+ cord->sched.flags |= FIBER_IS_RUNNING;
cord->max_fid = FIBER_ID_MAX_RESERVED;
/*
diff --git a/src/lib/core/fiber.h b/src/lib/core/fiber.h
index b9eeef697..586bc9433 100644
--- a/src/lib/core/fiber.h
+++ b/src/lib/core/fiber.h
@@ -162,6 +162,10 @@ enum {
* This flag is set when fiber uses custom stack size.
*/
FIBER_CUSTOM_STACK = 1 << 5,
+ /**
+ * The flag is set for the fiber currently being executed by the cord.
+ */
+ FIBER_IS_RUNNING = 1 << 6,
FIBER_DEFAULT_FLAGS = FIBER_IS_CANCELLABLE
};
@@ -279,7 +283,7 @@ API_EXPORT void
fiber_start(struct fiber *callee, ...);
/**
- * Interrupt a synchronous wait of a fiber
+ * Interrupt a synchronous wait of a fiber. Nop for the currently running fiber.
*
* \param f fiber to be woken up
*/
diff --git a/test/app-tap/gh-6043-fiber-wakeup-self.test.lua b/test/app-tap/gh-6043-fiber-wakeup-self.test.lua
new file mode 100755
index 000000000..082c1228d
--- /dev/null
+++ b/test/app-tap/gh-6043-fiber-wakeup-self.test.lua
@@ -0,0 +1,50 @@
+#!/usr/bin/env tarantool
+
+local tap = require('tap')
+local fiber = require('fiber')
+
+--
+-- gh-6043: fiber.wakeup() on self could lead to a crash if followed by
+-- fiber.create(). Also it could lead to spurious wakeup if followed by a sleep
+-- with non-zero timeout, or by, for example, an infinite yield such as WAL
+-- write.
+--
+
+local function test_wakeup_self_and_call(test)
+ test:plan(3)
+
+ local self = fiber.self()
+ fiber.wakeup(self)
+ local count = 0
+ local f = fiber.create(function()
+ count = count + 1
+ fiber.yield()
+ count = count + 1
+ end)
+ test:is(count, 1, 'created a fiber')
+ fiber.yield()
+ test:is(count, 2, 'it yielded')
+ test:is(f:status(), 'dead', 'and ended')
+end
+
+local function test_wakeup_self_and_wal_write(test)
+ test:plan(1)
+
+ local s = box.schema.create_space('test')
+ s:create_index('pk')
+
+ fiber.wakeup(fiber.self())
+ local lsn = box.info.lsn
+ s:replace{1}
+ test:is(box.info.lsn, lsn + 1, 'written to WAL')
+ s:drop()
+end
+
+box.cfg{}
+
+local test = tap.test('gh-6043-fiber-wakeup-self')
+test:plan(2)
+test:test('wakeup self + call', test_wakeup_self_and_call)
+test:test('wakeup self + wal write', test_wakeup_self_and_wal_write)
+
+os.exit(test:check() and 0 or 1)
diff --git a/test/unit/fiber.cc b/test/unit/fiber.cc
index 9c1a23bdd..a1e3ded87 100644
--- a/test/unit/fiber.cc
+++ b/test/unit/fiber.cc
@@ -138,8 +138,7 @@ fiber_join_test()
fiber_set_joinable(fiber, true);
fiber_wakeup(fiber);
/** Let the fiber schedule */
- fiber_wakeup(fiber());
- fiber_yield();
+ fiber_reschedule();
note("by this time the fiber should be dead already");
fiber_cancel(fiber);
fiber_join(fiber);
@@ -201,12 +200,42 @@ fiber_name_test()
footer();
}
+static void
+fiber_wakeup_self_test()
+{
+ header();
+
+ struct fiber *f = fiber();
+
+ fiber_wakeup(f);
+ double duration = 0.001;
+ uint64_t t1 = fiber_clock64();
+ fiber_sleep(duration);
+ uint64_t t2 = fiber_clock64();
+ /*
+ * It was a real sleep, not 0 duration. Wakeup is nop on the running
+ * fiber.
+ */
+ assert(t2 - t1 >= duration);
+
+ /*
+ * Wakeup + start of a new fiber. This is different from yield but
+ * works without crashes too.
+ */
+ struct fiber *newf = fiber_new_xc("nop", noop_f);
+ fiber_wakeup(f);
+ fiber_start(newf);
+
+ footer();
+}
+
static int
main_f(va_list ap)
{
fiber_name_test();
fiber_join_test();
fiber_stack_test();
+ fiber_wakeup_self_test();
ev_break(loop(), EVBREAK_ALL);
return 0;
}
diff --git a/test/unit/fiber.result b/test/unit/fiber.result
index a61e0a2b8..36bf2a599 100644
--- a/test/unit/fiber.result
+++ b/test/unit/fiber.result
@@ -17,3 +17,5 @@ SystemError Failed to allocate 42 bytes in allocator for exception: Cannot alloc
# normal-stack fiber not crashed
# big-stack fiber not crashed
*** fiber_stack_test: done ***
+ *** fiber_wakeup_self_test ***
+ *** fiber_wakeup_self_test: done ***
diff --git a/test/unit/fiber_stress.cc b/test/unit/fiber_stress.cc
index 44ab12487..aa892d0a0 100644
--- a/test/unit/fiber_stress.cc
+++ b/test/unit/fiber_stress.cc
@@ -9,10 +9,8 @@ enum {
static int
yield_f(va_list ap)
{
- for (int i = 0; i < ITERATIONS; i++) {
- fiber_wakeup(fiber());
- fiber_yield();
- }
+ for (int i = 0; i < ITERATIONS; i++)
+ fiber_reschedule();
return 0;
}
--
2.24.3 (Apple Git-128)
More information about the Tarantool-patches
mailing list