[Tarantool-patches] [PATCH 1/2] fiber: introduce fiber_touch and fiber_continue

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Fri Apr 23 02:05:54 MSK 2021


These new functions are supposed to be used instead of
fiber_wakeup whose behaviour sometimes leads to unexpected and
hard to catch bugs and to crashes when called on the self fiber,
usually accidentally.

fiber_touch() allows to wakeup any fiber safely, even self.
Because on self it turns into NOP, but it is not any slower than
fiber_wakeup(), because uses the same code, but with a bit
different flag set.

fiber_continue() is like fiber_wakeup() but it has an assertion
that it is not called on self allowing to catch self-wakeup bugs
early and easy.

fiber_wakeup() still exists because it is a part of the public
API.

In order to make fiber_touch() as cheap as fiber_wakeup() there is
a new flag FIBER_IS_RUNNING. It is set for the fiber stored in the
cord - the currently running one. The only difference between the
wakeup and touch is wakeup skips READY | DEAD fibers, while the
touch skips READY | DEAD | RUNNING. Both cost one bitwise OR
operation.

Part of #5292
---
 src/lib/core/fiber.c   | 107 ++++++++++++++++++++++++-----------------
 src/lib/core/fiber.h   |  26 ++++++++++
 test/unit/fiber.cc     |  64 ++++++++++++++++++++++++
 test/unit/fiber.result |   4 ++
 4 files changed, 158 insertions(+), 43 deletions(-)

diff --git a/src/lib/core/fiber.c b/src/lib/core/fiber.c
index f8b85d99d..96de83042 100644
--- a/src/lib/core/fiber.c
+++ b/src/lib/core/fiber.c
@@ -396,10 +396,13 @@ fiber_call_impl(struct fiber *callee)
 	assert(rlist_empty(&callee->state));
 	assert(caller);
 	assert(caller != callee);
+	assert((caller->flags & FIBER_IS_RUNNING) != 0);
+	assert((callee->flags & FIBER_IS_RUNNING) == 0);
 
+	caller->flags &= ~FIBER_IS_RUNNING;
 	cord->fiber = callee;
+	callee->flags = (callee->flags & ~FIBER_IS_READY) | FIBER_IS_RUNNING;
 
-	callee->flags &= ~FIBER_IS_READY;
 	ASAN_START_SWITCH_FIBER(asan_state, 1,
 				callee->stack,
 				callee->stack_size);
@@ -452,33 +455,10 @@ fiber_checkstack(void)
  * call, it simply reschedules the fiber after other ready
  * fibers in the same event loop iteration.
  */
-void
-fiber_wakeup(struct fiber *f)
+static void
+fiber_make_ready(struct fiber *f)
 {
-	assert(! (f->flags & FIBER_IS_DEAD));
-	/**
-	 * Do nothing if the fiber is already in cord->ready
-	 * list *or* is in the call chain created by
-	 * fiber_schedule_list(). While it's harmless to re-add
-	 * a fiber to cord->ready, even if it's already there,
-	 * but the same game is deadly when the fiber is in
-	 * the callee list created by fiber_schedule_list().
-	 *
-	 * To put it another way, fiber_wakeup() is a 'request' to
-	 * schedule the fiber for execution, and once it is executing
-	 * a wakeup request is considered complete and it must be
-	 * removed.
-	 *
-	 * A dead fiber can be lingering in the cord fiber list
-	 * if it is joinable. This makes it technically possible
-	 * to schedule it. We would never make such a mistake
-	 * in our own code, hence the assert above. But as long
-	 * as fiber.wakeup() is a part of public Lua API, an
-	 * external rock can mess things up. Ignore such attempts
-	 * as well.
-	 */
-	if (f->flags & (FIBER_IS_READY | FIBER_IS_DEAD))
-		return;
+	assert((f->flags & (FIBER_IS_DEAD | FIBER_IS_READY)) == 0);
 	struct cord *cord = cord();
 	if (rlist_empty(&cord->ready)) {
 		/*
@@ -503,6 +483,45 @@ fiber_wakeup(struct fiber *f)
 	f->flags |= FIBER_IS_READY;
 }
 
+void
+fiber_wakeup(struct fiber *f)
+{
+	assert((f->flags & FIBER_IS_DEAD) == 0);
+	/*
+	 * Do nothing if the fiber is already in cord->ready list *or* is in the
+	 * call chain created by fiber_schedule_list(). While it's harmless to
+	 * re-add a fiber to cord->ready, even if it's already there, but the
+	 * same game is deadly when the fiber is in the callee list created by
+	 * fiber_schedule_list().
+	 *
+	 * To put it another way, fiber_wakeup() is a 'request' to schedule the
+	 * fiber for execution, and once it is executing a wakeup request is
+	 * considered complete and it must be removed.
+	 *
+	 * A dead fiber can be lingering in the cord fiber list if it is
+	 * joinable. This makes it technically possible to schedule it. We would
+	 * never make such a mistake in our own code, hence the assert above.
+	 * But as long as fiber.wakeup() is a part of public Lua API, an
+	 * external rock can mess things up. Ignore such attempts
+	 * as well.
+	 */
+	if ((f->flags & (FIBER_IS_READY | FIBER_IS_DEAD)) == 0)
+		fiber_make_ready(f);
+}
+
+void
+fiber_touch(struct fiber *f)
+{
+	assert((f->flags & FIBER_IS_DEAD) == 0);
+	/*
+	 * The reason why DEAD is checked and asserted at the same time see in
+	 * fiber_wakeup().
+	 */
+	if ((f->flags & (FIBER_IS_READY | FIBER_IS_DEAD |
+	    FIBER_IS_RUNNING)) == 0)
+		fiber_make_ready(f);
+}
+
 /** Cancel the subject fiber.
 *
  * Note: cancelation is asynchronous. Use fiber_join() to wait for the
@@ -521,7 +540,6 @@ void
 fiber_cancel(struct fiber *f)
 {
 	assert(f->fid != 0);
-	struct fiber *self = fiber();
 	/**
 	 * Do nothing if the fiber is dead, since cancelling
 	 * the fiber would clear the diagnostics area and
@@ -535,10 +553,8 @@ fiber_cancel(struct fiber *f)
 	/**
 	 * Don't wake self and zombies.
 	 */
-	if (f != self) {
-		if (f->flags & FIBER_IS_CANCELLABLE)
-			fiber_wakeup(f);
-	}
+	if ((f->flags & FIBER_IS_CANCELLABLE) != 0)
+		fiber_touch(f);
 }
 
 /**
@@ -686,8 +702,13 @@ fiber_yield(void)
 
 	assert(callee->flags & FIBER_IS_READY || callee == &cord->sched);
 	assert(! (callee->flags & FIBER_IS_DEAD));
+	assert((caller->flags & FIBER_IS_RUNNING) != 0);
+	assert((callee->flags & FIBER_IS_RUNNING) == 0);
+
+	caller->flags &= ~FIBER_IS_RUNNING;
 	cord->fiber = callee;
-	callee->flags &= ~FIBER_IS_READY;
+	callee->flags = (callee->flags & ~FIBER_IS_READY) | FIBER_IS_RUNNING;
+
 	ASAN_START_SWITCH_FIBER(asan_state,
 				(caller->flags & FIBER_IS_DEAD) == 0,
 				callee->stack,
@@ -712,7 +733,7 @@ fiber_schedule_timeout(ev_loop *loop,
 	struct fiber_watcher_data *state =
 			(struct fiber_watcher_data *) watcher->data;
 	state->timed_out = true;
-	fiber_wakeup(state->f);
+	fiber_continue(state->f);
 }
 
 /**
@@ -748,10 +769,6 @@ fiber_sleep(double delay)
 	if (delay == 0) {
 		ev_idle_start(loop(), &cord()->idle_event);
 	}
-	/*
-	 * We don't use fiber_wakeup() here to ensure there is
-	 * no infinite wakeup loop in case of fiber_sleep(0).
-	 */
 	fiber_yield_timeout(delay);
 
 	if (delay == 0) {
@@ -766,7 +783,7 @@ fiber_schedule_cb(ev_loop *loop, ev_watcher *watcher, int revents)
 	(void) revents;
 	struct fiber *fiber = watcher->data;
 	assert(fiber() == &cord()->sched);
-	fiber_wakeup(fiber);
+	fiber_continue(fiber);
 }
 
 static inline void
@@ -864,7 +881,11 @@ fiber_reset(struct fiber *fiber)
 {
 	rlist_create(&fiber->on_yield);
 	rlist_create(&fiber->on_stop);
-	fiber->flags = FIBER_DEFAULT_FLAGS;
+	/*
+	 * Preserve the running flag if set. Reset might be called on the
+	 * current fiber when it is recycled.
+	 */
+	fiber->flags = FIBER_DEFAULT_FLAGS | (fiber->flags & FIBER_IS_RUNNING);
 #if ENABLE_FIBER_TOP
 	clock_stat_reset(&fiber->clock_stat);
 #endif /* ENABLE_FIBER_TOP */
@@ -929,8 +950,7 @@ fiber_loop(MAYBE_UNUSED void *data)
 		       struct fiber *f;
 		       f = rlist_shift_entry(&fiber->wake, struct fiber,
 					     state);
-		       assert(f != fiber);
-		       fiber_wakeup(f);
+		       fiber_continue(f);
 	        }
 		fiber_on_stop(fiber);
 		/* reset pending wakeups */
@@ -1449,6 +1469,7 @@ cord_create(struct cord *cord, const char *name)
 	cord->sched.name = NULL;
 	fiber_set_name(&cord->sched, "sched");
 	cord->fiber = &cord->sched;
+	cord->sched.flags |= FIBER_IS_RUNNING;
 
 	cord->max_fid = FIBER_ID_MAX_RESERVED;
 	/*
@@ -1632,7 +1653,7 @@ cord_cojoin_wakeup(struct ev_loop *loop, struct ev_async *ev, int revents)
 	struct cord_cojoin_ctx *ctx = (struct cord_cojoin_ctx *)ev->data;
 
 	ctx->task_complete = true;
-	fiber_wakeup(ctx->fiber);
+	fiber_continue(ctx->fiber);
 }
 
 int
diff --git a/src/lib/core/fiber.h b/src/lib/core/fiber.h
index b9eeef697..2506a178c 100644
--- a/src/lib/core/fiber.h
+++ b/src/lib/core/fiber.h
@@ -162,6 +162,10 @@ enum {
 	 * This flag is set when fiber uses custom stack size.
 	 */
 	FIBER_CUSTOM_STACK	= 1 << 5,
+	/**
+	 * The flag is set for the fiber currently being executed by the cord.
+	 */
+	FIBER_IS_RUNNING	= 1 << 6,
 	FIBER_DEFAULT_FLAGS = FIBER_IS_CANCELLABLE
 };
 
@@ -820,6 +824,28 @@ fiber_destroy_all(struct cord *cord);
 void
 fiber_gc(void);
 
+/**
+ * A polite version of wakeup. The same as fiber_wakeup() but is nop for the
+ * currently running fiber. Allows to save one 'if' compared to wakeup by not
+ * forcing to check if the fiber is the current one.
+ */
+void
+fiber_touch(struct fiber *f);
+
+/**
+ * The same as fiber_wakeup() but bans continuation of the current fiber as it
+ * might lead to very unexpected behaviour. For instance, some code called
+ * wakeup on self accidentally, and much later in some other subsystem is called
+ * fiber_yield() to wait for something - it would return right away without
+ * waiting causing troubles of all kinds.
+ */
+static inline void
+fiber_continue(struct fiber *f)
+{
+	assert(f != fiber());
+	fiber_wakeup(f);
+}
+
 void
 fiber_call(struct fiber *callee);
 
diff --git a/test/unit/fiber.cc b/test/unit/fiber.cc
index 9c1a23bdd..65c083230 100644
--- a/test/unit/fiber.cc
+++ b/test/unit/fiber.cc
@@ -201,12 +201,76 @@ fiber_name_test()
 	footer();
 }
 
+static int
+yield_f(va_list ap)
+{
+	bool *is_started = va_arg(ap, bool *);
+	*is_started = true;
+	fiber_yield();
+	return 0;
+}
+
+static void
+fiber_touch_test()
+{
+	header();
+
+	struct fiber *f = fiber();
+
+	fiber_touch(f);
+	double duration = 0.001;
+	uint64_t t1 = fiber_clock64();
+	fiber_sleep(duration);
+	uint64_t t2 = fiber_clock64();
+	/*
+	 * It was a real sleep, not immediate wakeup. Touch is nop on a running
+	 * fiber.
+	 */
+	assert(t2 - t1 >= duration);
+
+	bool is_started = false;
+	f = fiber_new_xc("yield", yield_f);
+	fiber_set_joinable(f, true);
+	fiber_start(f, &is_started);
+	/* Let the infinitely yielding fiber to start its yield. */
+	fiber_sleep(0);
+	assert(is_started);
+	fiber_touch(f);
+	int rc = fiber_join(f);
+	assert(rc == 0);
+
+	footer();
+}
+
+static void
+fiber_continue_test()
+{
+	header();
+
+	struct fiber *f = fiber();
+
+	bool is_started = false;
+	f = fiber_new_xc("yield", yield_f);
+	fiber_set_joinable(f, true);
+	fiber_start(f, &is_started);
+	/* Let the infinitely yielding fiber to start its yield. */
+	fiber_sleep(0);
+	assert(is_started);
+	fiber_continue(f);
+	int rc = fiber_join(f);
+	assert(rc == 0);
+
+	footer();
+}
+
 static int
 main_f(va_list ap)
 {
 	fiber_name_test();
 	fiber_join_test();
 	fiber_stack_test();
+	fiber_touch_test();
+	fiber_continue_test();
 	ev_break(loop(), EVBREAK_ALL);
 	return 0;
 }
diff --git a/test/unit/fiber.result b/test/unit/fiber.result
index a61e0a2b8..23f331e9b 100644
--- a/test/unit/fiber.result
+++ b/test/unit/fiber.result
@@ -17,3 +17,7 @@ SystemError Failed to allocate 42 bytes in allocator for exception: Cannot alloc
 # normal-stack fiber not crashed
 # big-stack fiber not crashed
 	*** fiber_stack_test: done ***
+	*** fiber_touch_test ***
+	*** fiber_touch_test: done ***
+	*** fiber_continue_test ***
+	*** fiber_continue_test: done ***
-- 
2.24.3 (Apple Git-128)



More information about the Tarantool-patches mailing list