[tarantool-patches] Re: [PATCH 3/5] swim: allow to hang triggers on member updates

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Thu Jun 6 00:53:56 MSK 2019


On 05/06/2019 10:11, Konstantin Osipov wrote:
> * Vladislav Shpilevoy <v.shpilevoy at tarantool.org> [19/06/03 14:33]:
> 
> I like the design: elegant and lean. Some comments below.
> 

Thanks.

>> This commit allows to hang triggers on member updates.
> 
> "hang triggers" sounds ominous. I think you simply
> meant "set triggers". See also patch subject, please change as
> well.
> 

Yes, 'set' is the right word. Updated in all places.

>>  	struct rlist in_dissemination_queue;
>> +	/**
>> +	 * Each time a member is updated, or created, or dropped,
>> +	 * it is added to an update queue. Members from this queue
>> +	 * are dispatched into user defined triggers.
>> +	 */
>> +	struct stailq_entry in_update_queue;
> 
> I think the name is a bit confusing. After all, the dissemination
> queue is also an update queue. in_local_queue? in_trigger_queue?
> in_event_queue? in_pending_member_update_queue? 
> in_member_update? in_member_update_queue?

Indeed, 'update' is not the best name. I guess, 'event_queue'
is better - it is consistent with 'events' field in
struct swim_member. Besides, when a member is dropped, technically
we can't call it 'update'.

But then we should rename everything, including
'swim_trigger_list_on_member_update', and Lua part
'swim:on_member_update'.

They will be 'swim_trigger_list_on_member_event',
'swim:on_member_event' etc.

=======================================================================

diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c
index 8507eea14..d48e43761 100644
--- a/src/lib/swim/swim.c
+++ b/src/lib/swim/swim.c
@@ -337,10 +337,10 @@ struct swim_member {
 	struct rlist in_dissemination_queue;
 	/**
 	 * Each time a member is updated, or created, or dropped,
-	 * it is added to an update queue. Members from this queue
+	 * it is added to an event queue. Members from this queue
 	 * are dispatched into user defined triggers.
 	 */
-	struct stailq_entry in_update_queue;
+	struct stailq_entry in_event_queue;
 	/**
 	 * Mask of events happened with this member since a
 	 * previous trigger invocation. Once the events are
@@ -474,12 +474,12 @@ struct swim {
 	 * the events to triggers. Dropped members are also kept
 	 * here until they are handled by a trigger.
 	 */
-	struct stailq update_queue;
+	struct stailq event_queue;
 	/**
 	 * List of triggers to call on each new, dropped, and
 	 * updated member.
 	 */
-	struct rlist on_member_update;
+	struct rlist on_member_event;
 	/**
 	 * Members to which a message should be sent next during
 	 * this round.
@@ -498,10 +498,10 @@ struct swim {
 	 */
 	struct swim_member **shuffled;
 	/**
-	 * Worker fiber to serve member update triggers. This task
+	 * Worker fiber to serve member event triggers. This task
 	 * is being done in a separate fiber, because user
 	 * triggers can yield and libev callbacks, processing
-	 * member updates, are not allowed to yield.
+	 * member events, are not allowed to yield.
 	 */
 	struct fiber *worker;
 	/**
@@ -587,10 +587,10 @@ swim_on_member_update(struct swim *swim, struct swim_member *member,
 	member->unacknowledged_pings = 0;
 	swim_register_event(swim, member);
 	/*
-	 * Member update should be delivered to triggers only
-	 * if there is at least one trigger.
+	 * Member event should be delivered to triggers only if
+	 * there is at least one trigger.
 	 */
-	if (! rlist_empty(&swim->on_member_update)) {
+	if (! rlist_empty(&swim->on_member_event)) {
 		/*
 		 * Member is referenced and added to a queue only
 		 * once. That moment can be detected when a first
@@ -598,8 +598,8 @@ swim_on_member_update(struct swim *swim, struct swim_member *member,
 		 */
 		if (member->events == 0 && events != 0) {
 			swim_member_ref(member);
-			stailq_add_tail_entry(&swim->update_queue, member,
-					      in_update_queue);
+			stailq_add_tail_entry(&swim->event_queue, member,
+					      in_event_queue);
 			fiber_wakeup(swim->worker);
 		}
 		member->events |= events;
@@ -607,15 +607,15 @@ swim_on_member_update(struct swim *swim, struct swim_member *member,
 }
 
 struct rlist *
-swim_trigger_list_on_member_update(struct swim *swim)
+swim_trigger_list_on_member_event(struct swim *swim)
 {
-	return &swim->on_member_update;
+	return &swim->on_member_event;
 }
 
 bool
 swim_has_pending_events(struct swim *swim)
 {
-	return ! stailq_empty(&swim->update_queue);
+	return ! stailq_empty(&swim->event_queue);
 }
 
 /**
@@ -1744,7 +1744,7 @@ swim_do_delete(struct swim *swim);
 
 /**
  * Worker fiber. At this moment its only task is dispatching
- * member updates to user defined triggers. Generally, because
+ * member events to user defined triggers. Generally, because
  * SWIM is fully IO driven, that fiber should be used only for
  * yielding tasks not related to SWIM core logic. For all the
  * other tasks libev callbacks are ok. Unfortunately, yields are
@@ -1755,22 +1755,22 @@ static int
 swim_worker_f(va_list va)
 {
 	struct swim *s = va_arg(va, struct swim *);
-	struct swim_on_member_update_ctx ctx;
+	struct swim_on_member_event_ctx ctx;
 	while (! fiber_is_cancelled()) {
-		while (stailq_empty(&s->update_queue)) {
+		while (stailq_empty(&s->event_queue)) {
 			fiber_yield();
 			if (fiber_is_cancelled())
 				goto end;
 		}
 		/*
 		 * Can't be empty. SWIM deletes members from
-		 * update queue only on SWIM deletion, but then
+		 * event queue only on SWIM deletion, but then
 		 * the fiber would be stopped already.
 		 */
-		assert(! stailq_empty(&s->update_queue));
+		assert(! stailq_empty(&s->event_queue));
 		struct swim_member *m =
-			stailq_shift_entry(&s->update_queue, struct swim_member,
-					   in_update_queue);
+			stailq_shift_entry(&s->event_queue, struct swim_member,
+					   in_event_queue);
 		/*
 		 * It is possible, that a member was added and
 		 * removed before firing a trigger. It happens,
@@ -1783,7 +1783,7 @@ swim_worker_f(va_list va)
 			ctx.member = m;
 			ctx.events = m->events;
 			m->events = 0;
-			if (trigger_run(&s->on_member_update, (void *) &ctx))
+			if (trigger_run(&s->on_member_event, (void *) &ctx))
 				diag_log();
 		}
 		swim_member_unref(m);
@@ -1832,8 +1832,8 @@ swim_new(void)
 
 	/* Dissemination component. */
 	rlist_create(&swim->dissemination_queue);
-	rlist_create(&swim->on_member_update);
-	stailq_create(&swim->update_queue);
+	rlist_create(&swim->on_member_event);
+	stailq_create(&swim->event_queue);
 
 	fiber_start(swim->worker, swim);
 	return swim;
@@ -2096,7 +2096,7 @@ swim_do_delete(struct swim *swim)
 	swim_ev_timer_stop(l, &swim->round_tick);
 	swim_ev_timer_stop(l, &swim->wait_ack_tick);
 	struct swim_member *m, *tmp;
-	stailq_foreach_entry_safe(m, tmp, &swim->update_queue, in_update_queue)
+	stailq_foreach_entry_safe(m, tmp, &swim->event_queue, in_event_queue)
 		swim_member_unref(m);
 	mh_int_t node;
 	mh_foreach(swim->members, node) {
@@ -2114,7 +2114,7 @@ swim_do_delete(struct swim *swim)
 	swim_task_destroy(&swim->round_step_task);
 	wait_ack_heap_destroy(&swim->wait_ack_heap);
 	mh_swim_table_delete(swim->members);
-	trigger_destroy(&swim->on_member_update);
+	trigger_destroy(&swim->on_member_event);
 	free(swim->shuffled);
 	free(swim);
 }
diff --git a/src/lib/swim/swim.h b/src/lib/swim/swim.h
index 8a5bc9522..a42ace7c6 100644
--- a/src/lib/swim/swim.h
+++ b/src/lib/swim/swim.h
@@ -286,8 +286,8 @@ enum swim_ev_mask {
 	SWIM_EV_DROP            = 0b00100000,
 };
 
-/** On member update trigger context. */
-struct swim_on_member_update_ctx {
+/** On member event trigger context. */
+struct swim_on_member_event_ctx {
 	/** New, dropped, or updated member. */
 	struct swim_member *member;
 	/** Mask of happened events. */
@@ -295,22 +295,22 @@ struct swim_on_member_update_ctx {
 };
 
 /**
- * A list of member update triggers. A couple of words about such
- * a strange API, instead of something like "add_trigger",
- * "drop_trigger". A main motivation is that some functions need
- * a whole trigger list. For example, a function adding Lua
- * functions as triggers. At the time of this writing it was
- * lbox_trigger_reset. There is a convention, that any Tarantool
- * trigger exposed to Lua should provide a way to add one, drop
- * one, replace one, return all - lbox_trigger_reset does all of
- * this.
+ * A list of member event processing triggers. A couple of words
+ * about such a strange API, instead of something like
+ * "add_trigger", "drop_trigger". A main motivation is that some
+ * functions need a whole trigger list. For example, a function
+ * adding Lua functions as triggers. At the time of this writing
+ * it was lbox_trigger_reset. There is a convention, that any
+ * Tarantool trigger exposed to Lua should provide a way to add
+ * one, drop one, replace one, return all - lbox_trigger_reset
+ * does all of this.
  */
 struct rlist *
-swim_trigger_list_on_member_update(struct swim *swim);
+swim_trigger_list_on_member_event(struct swim *swim);
 
 /**
- * Check if a SWIM instance has pending update events. Is not a
- * public one, used by tests.
+ * Check if a SWIM instance has pending events. Is not a public
+ * one, used by tests.
  */
 bool
 swim_has_pending_events(struct swim *swim);
diff --git a/test/unit/swim.c b/test/unit/swim.c
index 3f2d156c5..8bf9b0eba 100644
--- a/test/unit/swim.c
+++ b/test/unit/swim.c
@@ -956,22 +956,22 @@ struct trigger_ctx {
 	bool is_deleted;
 	bool need_sleep;
 	struct fiber *f;
-	struct swim_on_member_update_ctx ctx;
+	struct swim_on_member_event_ctx ctx;
 };
 
 static void
-swim_on_member_update_save_event(struct trigger *t, void *event)
+swim_on_member_event_save(struct trigger *t, void *event)
 {
 	struct trigger_ctx *c = (struct trigger_ctx *) t->data;
 	++c->counter;
 	if (c->ctx.member != NULL)
 		swim_member_unref(c->ctx.member);
-	c->ctx = *((struct swim_on_member_update_ctx *) event);
+	c->ctx = *((struct swim_on_member_event_ctx *) event);
 	swim_member_ref(c->ctx.member);
 }
 
 static void
-swim_on_member_update_yield(struct trigger *t, void *event)
+swim_on_member_event_yield(struct trigger *t, void *event)
 {
 	struct trigger_ctx *c = (struct trigger_ctx *) t->data;
 	++c->counter;
@@ -997,14 +997,14 @@ swim_test_triggers(void)
 	memset(&tctx2, 0, sizeof(tctx2));
 	struct trigger *t1 = (struct trigger *) malloc(sizeof(*t1));
 	assert(t1 != NULL);
-	trigger_create(t1, swim_on_member_update_save_event, (void *) &tctx,
+	trigger_create(t1, swim_on_member_event_save, (void *) &tctx,
 		       swim_trigger_destroy_cb);
 
 	/* Skip 'new self' events. */
 	swim_cluster_run_triggers(cluster);
 
 	struct swim *s1 = swim_cluster_member(cluster, 0);
-	trigger_add(swim_trigger_list_on_member_update(s1), t1);
+	trigger_add(swim_trigger_list_on_member_event(s1), t1);
 	swim_cluster_interconnect(cluster, 0, 1);
 	swim_cluster_run_triggers(cluster);
 
@@ -1052,14 +1052,14 @@ swim_test_triggers(void)
 	 * There is a complication about yields. If a trigger
 	 * yields, other triggers wait for its finish. And all
 	 * the triggers should be ready to SWIM deletion in the
-	 * middle of an update processing. SWIM object should not
+	 * middle of an event processing. SWIM object should not
 	 * be deleted, until all the triggers are done.
 	 */
 	struct trigger *t2 = (struct trigger *) malloc(sizeof(*t2));
 	assert(t2 != NULL);
 	tctx2.need_sleep = true;
-	trigger_create(t2, swim_on_member_update_yield, (void *) &tctx2, NULL);
-	trigger_add(swim_trigger_list_on_member_update(s1), t2);
+	trigger_create(t2, swim_on_member_event_yield, (void *) &tctx2, NULL);
+	trigger_add(swim_trigger_list_on_member_event(s1), t2);
 	swim_cluster_add_link(cluster, 0, 1);
 	swim_cluster_run_triggers(cluster);
 	is(tctx2.counter, 1, "yielding trigger is fired");
diff --git a/test/unit/swim_test_utils.c b/test/unit/swim_test_utils.c
index 0b4743b2d..463c62390 100644
--- a/test/unit/swim_test_utils.c
+++ b/test/unit/swim_test_utils.c
@@ -200,8 +200,8 @@ void
 swim_test_event_cb(struct trigger *trigger, void *event)
 {
 	(void) trigger;
-	struct swim_on_member_update_ctx *ctx =
-		(struct swim_on_member_update_ctx *) event;
+	struct swim_on_member_event_ctx *ctx =
+		(struct swim_on_member_event_ctx *) event;
 	assert(ctx->events != 0);
 	assert((ctx->events & SWIM_EV_NEW) == 0 ||
 	       (ctx->events & SWIM_EV_DROP) == 0);
@@ -216,7 +216,7 @@ swim_node_create(struct swim_node *n, int id)
 	assert(n->swim != NULL);
 	struct trigger *t = (struct trigger *) malloc(sizeof(*t));
 	trigger_create(t, swim_test_event_cb, NULL, (trigger_f0) free);
-	trigger_add(swim_trigger_list_on_member_update(n->swim), t);
+	trigger_add(swim_trigger_list_on_member_event(n->swim), t);
 
 	char uri[128];
 	swim_cluster_id_to_uri(uri, id);

=======================================================================

>> +	/**
>> +	 * Worker fiber to serve member update triggers. This task
>> +	 * is being done in a separate fiber, because user
>> +	 * triggers can yield and libev callbacks, processing
>> +	 * member updates, are not allowed to yield.
>> +	 */
>> +	struct fiber *worker;
> 
> I think as long as the worker is only used for triggers, the name
> should be more specific. when the worker is used for other events
> or actions, it could be renamed.

Perhaps.

=======================================================================

diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c
index d48e43761..fbc3cc7be 100644
--- a/src/lib/swim/swim.c
+++ b/src/lib/swim/swim.c
@@ -498,12 +498,12 @@ struct swim {
 	 */
 	struct swim_member **shuffled;
 	/**
-	 * Worker fiber to serve member event triggers. This task
-	 * is being done in a separate fiber, because user
-	 * triggers can yield and libev callbacks, processing
-	 * member events, are not allowed to yield.
+	 * Fiber to serve member event triggers. This task is
+	 * being done in a separate fiber, because user triggers
+	 * can yield and libev callbacks, processing member
+	 * events, are not allowed to yield.
 	 */
-	struct fiber *worker;
+	struct fiber *event_handler;
 	/**
 	 * Single round step task. It is impossible to have
 	 * multiple round steps in the same SWIM instance at the
@@ -600,7 +600,7 @@ swim_on_member_update(struct swim *swim, struct swim_member *member,
 			swim_member_ref(member);
 			stailq_add_tail_entry(&swim->event_queue, member,
 					      in_event_queue);
-			fiber_wakeup(swim->worker);
+			fiber_wakeup(swim->event_handler);
 		}
 		member->events |= events;
 	}
@@ -1736,14 +1736,14 @@ error:
 
 /**
  * Do actual deletion of a SWIM instance. This destructor is
- * called only after worker fiber has been stopped, and triggers
- * do not work.
+ * called only after event handler fiber has been stopped, and
+ * triggers do not work.
  */
 static void
 swim_do_delete(struct swim *swim);
 
 /**
- * Worker fiber. At this moment its only task is dispatching
+ * Event handler. At this moment its only task is dispatching
  * member events to user defined triggers. Generally, because
  * SWIM is fully IO driven, that fiber should be used only for
  * yielding tasks not related to SWIM core logic. For all the
@@ -1752,7 +1752,7 @@ swim_do_delete(struct swim *swim);
  * invoked by a cord scheduler fiber prohibited for manual yields.
  */
 static int
-swim_worker_f(va_list va)
+swim_event_handler_f(va_list va)
 {
 	struct swim *s = va_arg(va, struct swim *);
 	struct swim_on_member_event_ctx ctx;
@@ -1802,14 +1802,15 @@ swim_new(void)
 		diag_set(OutOfMemory, sizeof(*swim), "calloc", "swim");
 		return NULL;
 	}
-	swim->worker = fiber_new("SWIM worker", swim_worker_f);
-	if (swim->worker == NULL) {
+	swim->event_handler = fiber_new("SWIM event handler",
+					swim_event_handler_f);
+	if (swim->event_handler == NULL) {
 		free(swim);
 		return NULL;
 	}
 	swim->members = mh_swim_table_new();
 	if (swim->members == NULL) {
-		fiber_cancel(swim->worker);
+		fiber_cancel(swim->event_handler);
 		free(swim);
 		diag_set(OutOfMemory, sizeof(*swim->members),
 			 "mh_swim_table_new", "members");
@@ -1835,7 +1836,7 @@ swim_new(void)
 	rlist_create(&swim->on_member_event);
 	stailq_create(&swim->event_queue);
 
-	fiber_start(swim->worker, swim);
+	fiber_start(swim->event_handler, swim);
 	return swim;
 }
 
@@ -1944,8 +1945,9 @@ swim_cfg(struct swim *swim, const char *uri, double heartbeat_rate,
 		 * specified.
 		 */
 		addr = swim->scheduler.transport.addr;
-		fiber_set_name(swim->worker, tt_sprintf("SWIM worker %d",
-							swim_fd(swim)));
+		fiber_set_name(swim->event_handler,
+			       tt_sprintf("SWIM event handler %d",
+					  swim_fd(swim)));
 	} else {
 		addr = swim->self->addr;
 	}
@@ -2122,8 +2124,8 @@ swim_do_delete(struct swim *swim)
 void
 swim_delete(struct swim *swim)
 {
-	fiber_wakeup(swim->worker);
-	fiber_cancel(swim->worker);
+	fiber_wakeup(swim->event_handler);
+	fiber_cancel(swim->event_handler);
 	fiber_sleep(0);
 }
 
@@ -2139,8 +2141,8 @@ swim_quit_step_complete(struct swim_task *task,
 	(void) rc;
 	struct swim *swim = swim_by_scheduler(scheduler);
 	if (rlist_empty(&swim->round_queue)) {
-		fiber_wakeup(swim->worker);
-		fiber_cancel(swim->worker);
+		fiber_wakeup(swim->event_handler);
+		fiber_cancel(swim->event_handler);
 		return;
 	}
 	struct swim_member *m =

=======================================================================

> 
>> @@ -734,7 +801,6 @@ swim_delete_member(struct swim *swim, struct swim_member *member)
>>  	mh_int_t rc = mh_swim_table_find(swim->members, key, NULL);
>>  	assert(rc != mh_end(swim->members));
>>  	mh_swim_table_del(swim->members, rc, NULL);
>> -	swim_cached_round_msg_invalidate(swim);
>>  	rlist_del_entry(member, in_round_queue);
> 
> Stray change? At least I don't understand it.

No, it is not stray. I've started calling swim_on_member_update()
in swim_delete_member(). The former calls swim_cached_round_msg_invalidate
too, so I decided to remove one from the latter.

> 
>> +	struct swim *s = va_arg(va, struct swim *);
>> +	struct swim_on_member_update_ctx ctx;
>> +	while (! fiber_is_cancelled()) {
>> +		while (stailq_empty(&s->update_queue)) {
>> +			fiber_yield();
>> +			if (fiber_is_cancelled())
>> +				goto end;
> 
>> +		}
> 
> Since you use goto anyway, why a double loop?
> 
>     while (! fiber_is_cancelled()) {
>         if (stailq_empty(&s->update_queue) {
>             fiber_yield();
>             continue;
>         }
>         ... 
>     }

You are right. I forgot about 'continue', shame on me.

=======================================================================

diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c
index fbc3cc7be..17e1e6a7b 100644
--- a/src/lib/swim/swim.c
+++ b/src/lib/swim/swim.c
@@ -1757,10 +1757,9 @@ swim_event_handler_f(va_list va)
 	struct swim *s = va_arg(va, struct swim *);
 	struct swim_on_member_event_ctx ctx;
 	while (! fiber_is_cancelled()) {
-		while (stailq_empty(&s->event_queue)) {
+		if (stailq_empty(&s->event_queue)) {
 			fiber_yield();
-			if (fiber_is_cancelled())
-				goto end;
+			continue;
 		}
 		/*
 		 * Can't be empty. SWIM deletes members from
@@ -1788,7 +1787,6 @@ swim_event_handler_f(va_list va)
 		}
 		swim_member_unref(m);
 	}
-end:
 	swim_do_delete(s);
 	return 0;
 }

=======================================================================

>> +	}
>> +end:
>> +	swim_do_delete(s);
> 
> I don't mind swim_do_delete(), but I think the worker should not
> be destorying the instance. Looks a breach of encapsulation, this assumes
> the workers knows about all other uses of struct swim.
> 
> Why not cancel and join the worker from the destructor and then
> safely do the delete?

Yes, this is a breach. My motivation why I did this after multiple
iterations of patch remake and lots of doubts was that I wanted
swim:delete() be fast. With fiber_join() it will wait while all the
triggers are finished. But probably I magnified distress as usual,
and fiber_join would be really much simpler here.

=======================================================================

diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c
index 17e1e6a7b..6f34faa71 100644
--- a/src/lib/swim/swim.c
+++ b/src/lib/swim/swim.c
@@ -1734,14 +1734,6 @@ error:
 	diag_log();
 }
 
-/**
- * Do actual deletion of a SWIM instance. This destructor is
- * called only after event handler fiber has been stopped, and
- * triggers do not work.
- */
-static void
-swim_do_delete(struct swim *swim);
-
 /**
  * Event handler. At this moment its only task is dispatching
  * member events to user defined triggers. Generally, because
@@ -1787,7 +1779,6 @@ swim_event_handler_f(va_list va)
 		}
 		swim_member_unref(m);
 	}
-	swim_do_delete(s);
 	return 0;
 }
 
@@ -1806,6 +1797,7 @@ swim_new(void)
 		free(swim);
 		return NULL;
 	}
+	fiber_set_joinable(swim->event_handler, true);
 	swim->members = mh_swim_table_new();
 	if (swim->members == NULL) {
 		fiber_cancel(swim->event_handler);
@@ -2088,9 +2080,34 @@ swim_size(const struct swim *swim)
 	return mh_size(swim->members);
 }
 
-static void
-swim_do_delete(struct swim *swim)
+/**
+ * Cancel and wait finish of an event handler fiber. That
+ * operation is not inlined in the SWIM destructor, because there
+ * is one more place, when the handler should be stopped even
+ * before SWIM deletion - quit. Quit deletes the instance only
+ * when all the 'I left' messages are sent, but it happens in a
+ * libev callback in the scheduler fiber where it is impossible
+ * to yield. So to that moment the handler should be dead already.
+ */
+static inline void
+swim_kill_event_handler(struct swim *swim)
 {
+	struct fiber *f = swim->event_handler;
+	/*
+	 * Nullify so as not to keep pointer at a fiber when it is
+	 * reused.
+	 */
+	swim->event_handler = NULL;
+	fiber_wakeup(f);
+	fiber_cancel(f);
+	fiber_join(f);
+}
+
+void
+swim_delete(struct swim *swim)
+{
+	if (swim->event_handler != NULL)
+		swim_kill_event_handler(swim);
 	struct ev_loop *l = swim_loop();
 	swim_scheduler_destroy(&swim->scheduler);
 	swim_ev_timer_stop(l, &swim->round_tick);
@@ -2119,14 +2136,6 @@ swim_do_delete(struct swim *swim)
 	free(swim);
 }
 
-void
-swim_delete(struct swim *swim)
-{
-	fiber_wakeup(swim->event_handler);
-	fiber_cancel(swim->event_handler);
-	fiber_sleep(0);
-}
-
 /**
  * Quit message is broadcasted in the same way as round messages,
  * step by step, with the only difference that quit round steps
@@ -2139,8 +2148,12 @@ swim_quit_step_complete(struct swim_task *task,
 	(void) rc;
 	struct swim *swim = swim_by_scheduler(scheduler);
 	if (rlist_empty(&swim->round_queue)) {
-		fiber_wakeup(swim->event_handler);
-		fiber_cancel(swim->event_handler);
+		/*
+		 * The handler should be dead - can't yield here,
+		 * it is the scheduler fiber.
+		 */
+		assert(swim->event_handler == NULL);
+		swim_delete(swim);
 		return;
 	}
 	struct swim_member *m =
@@ -2169,6 +2182,11 @@ void
 swim_quit(struct swim *swim)
 {
 	assert(swim_is_configured(swim));
+	/*
+	 * Kill the handler now. Later it will be impossible to do
+	 * from the scheduler fiber.
+	 */
+	swim_kill_event_handler(swim);
 	struct ev_loop *l = swim_loop();
 	swim_ev_timer_stop(l, &swim->round_tick);
 	swim_ev_timer_stop(l, &swim->wait_ack_tick);
diff --git a/test/unit/swim.c b/test/unit/swim.c
index 8bf9b0eba..0e33d691c 100644
--- a/test/unit/swim.c
+++ b/test/unit/swim.c
@@ -986,6 +986,15 @@ swim_trigger_destroy_cb(struct trigger *t)
 	((struct trigger_ctx *) t->data)->is_deleted = true;
 }
 
+static int
+swim_cluster_delete_f(va_list ap)
+{
+	struct swim_cluster *c = (struct swim_cluster *)
+		va_arg(ap, struct swim_cluster *);
+	swim_cluster_delete(c);
+	return 0;
+}
+
 static void
 swim_test_triggers(void)
 {
@@ -1065,7 +1074,9 @@ swim_test_triggers(void)
 	is(tctx2.counter, 1, "yielding trigger is fired");
 	is(tctx.counter, 6, "non-yielding still is not");
 
-	swim_cluster_delete(cluster);
+	struct fiber *async_delete_fiber =
+		fiber_new("async delete", swim_cluster_delete_f);
+	fiber_start(async_delete_fiber, cluster);
 	ok(! tctx.is_deleted, "trigger is not deleted until all currently "\
 	   "sleeping triggers are finished");
 	tctx2.need_sleep = false;

=======================================================================

>> +	swim->worker = fiber_new("SWIM worker", swim_worker_f);
>> +	if (swim->worker == NULL) {
>> +		free(swim);
>> +		return NULL;
>> +	}
> 
>>  	swim->members = mh_swim_table_new();
>>  	if (swim->members == NULL) {
>> +		fiber_cancel(swim->worker);
> 
> Better to start the worker last, then you will have easy time
> destroying it on failure.  This is a minor nit, ignore if you wish.

=======================================================================

diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c
index ae66ce198..00234d1df 100644
--- a/src/lib/swim/swim.c
+++ b/src/lib/swim/swim.c
@@ -1791,15 +1791,8 @@ swim_new(void)
 		diag_set(OutOfMemory, sizeof(*swim), "calloc", "swim");
 		return NULL;
 	}
-	swim->event_handler = fiber_new("SWIM event handler",
-					swim_event_handler_f);
-	if (swim->event_handler == NULL) {
-		free(swim);
-		return NULL;
-	}
 	swim->members = mh_swim_table_new();
 	if (swim->members == NULL) {
-		fiber_cancel(swim->event_handler);
 		free(swim);
 		diag_set(OutOfMemory, sizeof(*swim->members),
 			 "mh_swim_table_new", "members");
@@ -1824,7 +1817,13 @@ swim_new(void)
 	rlist_create(&swim->dissemination_queue);
 	rlist_create(&swim->on_member_event);
 	stailq_create(&swim->event_queue);
-
+	swim->event_handler = fiber_new("SWIM event handler",
+					swim_event_handler_f);
+	if (swim->event_handler == NULL) {
+		swim_delete(swim);
+		return NULL;
+	}
+	fiber_set_joinable(swim->event_handler, true);
 	fiber_start(swim->event_handler, swim);
 	return swim;
 }




More information about the Tarantool-patches mailing list