[tarantool-patches] Re: [PATCH 2/5] swim: introduce member reference API

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Thu May 2 18:10:01 MSK 2019



On 01/05/2019 08:15, Konstantin Osipov wrote:
> * Vladislav Shpilevoy <v.shpilevoy at tarantool.org> [19/05/01 08:07]:
> 
> I disagree with adding a member status for dropped member.
> 
> By resetting the status you lose the information about the
> original status, as set when leaving the membership table.

My motivation of resetting the status was that a user can't
rely on that anymore, it is not valid. If a dropped member's
status is ALIVE, it does not mean, that it is really alive.
User's application should not use the old status.

And I wanted to force a user to stop looking at that invalid
member. Also new status allows to do not add a new function to
check member validity.

Please, reconsider this implementation again after my explanation
above. In case I did not persuade you, I implemented a
flag-based patch, see at the end of the mail.

> Anyway, member status is part of the swim protocol, and should not
> be used to reflect any memory management details specific to the
> programming language in use.
> 
> Please either have a separate status for zombie members 
> or simply check for refs == 1.

I thought about that, refs == 1 does not work. Consider this:
I do swim:find_member() 3 times in Lua. Now ref count of the
member is 4. Then the member is dropped. Its ref count == 3,
but it is dropped. So refs == 1 does not work as a flag.

I added a flag. But please, think again about status invalidation -
it can prevent fallacious belief in status validity in user space.

===================================================================

commit 0040b1966de9ca669676420a5e6c86aacd2f001e
Author: Vladislav Shpilevoy <v.shpilevoy at tarantool.org>
Date:   Tue Apr 30 18:46:58 2019 +0300

    swim: introduce member reference API
    
    Struct swim_member pointer is used to learn member's status,
    payload, incarnation etc. To obtain a pointer, a one should
    either lookup the member by UUID, or obtain from iterators API.
    The former is long, the latter is useless when a point lookup is
    needed.
    
    On the other hand, it was not safe to keep struct swim_member
    pointer for a long time, because it could be deleted at any
    moment.
    
    This patch allows to reference a member and be sure that it will
    not be deleted until dereferenced explicitly. The member still
    can be dropped from the member table, but its memory will be
    valid. To detect that a member is dropped, a user can use
    swim_member_is_dropped() function.
    
    Part of #3234

diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c
index dfee16493..ff95a8c2b 100644
--- a/src/lib/swim/swim.c
+++ b/src/lib/swim/swim.c
@@ -243,6 +243,19 @@ struct swim_member {
 	 * Position in a queue of members in the current round.
 	 */
 	struct rlist in_round_queue;
+	/**
+	 * Reference counter. Used by public API to prevent the
+	 * member deletion after it is obtained by UUID or from an
+	 * iterator.
+	 */
+	int refs;
+	/**
+	 * True, if the member was dropped from the member table.
+	 * At the same time it still can be not deleted, if users
+	 * of the public API referenced the member. Dropped member
+	 * is not valid anymore and should be dereferenced.
+	 */
+	bool is_dropped;
 	/**
 	 *
 	 *                 Dissemination component
@@ -638,11 +651,34 @@ swim_ping_task_complete(struct swim_task *task,
 	swim_wait_ack(swim, m, false);
 }
 
+void
+swim_member_ref(struct swim_member *member)
+{
+	++member->refs;
+}
+
+void
+swim_member_unref(struct swim_member *member)
+{
+	assert(member->refs > 0);
+	if (--member->refs == 0) {
+		free(member->payload);
+		free(member);
+	}
+}
+
+bool
+swim_member_is_dropped(const struct swim_member *member)
+{
+	return member->is_dropped;
+}
+
 /** Free member's resources. */
 static inline void
 swim_member_delete(struct swim_member *member)
 {
 	assert(rlist_empty(&member->in_round_queue));
+	member->is_dropped = true;
 
 	/* Failure detection component. */
 	assert(heap_node_is_stray(&member->in_wait_ack_heap));
@@ -651,9 +687,8 @@ swim_member_delete(struct swim_member *member)
 
 	/* Dissemination component. */
 	assert(rlist_empty(&member->in_dissemination_queue));
-	free(member->payload);
 
-	free(member);
+	swim_member_unref(member);
 }
 
 /** Create a new member. It is not registered anywhere here. */
@@ -667,6 +702,7 @@ swim_member_new(const struct sockaddr_in *addr, const struct tt_uuid *uuid,
 		diag_set(OutOfMemory, sizeof(*member), "calloc", "member");
 		return NULL;
 	}
+	member->refs = 1;
 	member->status = status;
 	member->addr = *addr;
 	member->uuid = *uuid;
@@ -1969,14 +2005,14 @@ swim_quit(struct swim *swim)
 	swim_quit_step_complete(task, &swim->scheduler, 0);
 }
 
-const struct swim_member *
+struct swim_member *
 swim_self(struct swim *swim)
 {
 	assert(swim_is_configured(swim));
 	return swim->self;
 }
 
-const struct swim_member *
+struct swim_member *
 swim_member_by_uuid(struct swim *swim, const struct tt_uuid *uuid)
 {
 	assert(swim_is_configured(swim));
@@ -1997,7 +2033,7 @@ swim_iterator_open(struct swim *swim)
 	return (struct swim_iterator *) swim;
 }
 
-const struct swim_member *
+struct swim_member *
 swim_iterator_next(struct swim_iterator *iterator)
 {
 	struct swim *swim = (struct swim *) iterator;
diff --git a/src/lib/swim/swim.h b/src/lib/swim/swim.h
index 653e45be7..2f53716c7 100644
--- a/src/lib/swim/swim.h
+++ b/src/lib/swim/swim.h
@@ -186,7 +186,7 @@ void
 swim_quit(struct swim *swim);
 
 /** Get a SWIM member, describing this instance. */
-const struct swim_member *
+struct swim_member *
 swim_self(struct swim *swim);
 
 /**
@@ -194,7 +194,7 @@ swim_self(struct swim *swim);
  * @retval NULL Not found.
  * @retval not NULL A member.
  */
-const struct swim_member *
+struct swim_member *
 swim_member_by_uuid(struct swim *swim, const struct tt_uuid *uuid);
 
 /** Member's current status. */
@@ -216,7 +216,7 @@ swim_iterator_open(struct swim *swim);
  * @retval NULL EOF.
  * @retval not NULL A valid member.
  */
-const struct swim_member *
+struct swim_member *
 swim_iterator_next(struct swim_iterator *iterator);
 
 /** Close an iterator. */
@@ -239,6 +239,28 @@ swim_member_incarnation(const struct swim_member *member);
 const char *
 swim_member_payload(const struct swim_member *member, uint16_t *size);
 
+/**
+ * Reference a member. The member memory will be valid until unref
+ * is called.
+ */
+void
+swim_member_ref(struct swim_member *member);
+
+/**
+ * Dereference a member. After that it may be deleted and can't be
+ * accessed anymore.
+ */
+void
+swim_member_unref(struct swim_member *member);
+
+/**
+ * Check if a member was dropped from the member table. It means,
+ * that the member is not valid anymore and should be
+ * dereferenced.
+ */
+bool
+swim_member_is_dropped(const struct swim_member *member);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/test/unit/swim.c b/test/unit/swim.c
index 5aaae089d..8303e2900 100644
--- a/test/unit/swim.c
+++ b/test/unit/swim.c
@@ -176,7 +176,7 @@ swim_test_cfg(void)
 static void
 swim_test_add_remove(void)
 {
-	swim_start_test(13);
+	swim_start_test(14);
 
 	struct swim_cluster *cluster = swim_cluster_new(2);
 	swim_cluster_add_link(cluster, 0, 1);
@@ -232,6 +232,17 @@ swim_test_add_remove(void)
 	swim_cluster_unblock_io(cluster, 0);
 	is(swim_cluster_wait_fullmesh(cluster, 1), 0,
 	   "back in fullmesh after a member removal in the middle of a step");
+	/*
+	 * Check that member removal does not delete a member,
+	 * only unrefs.
+	 */
+	const struct tt_uuid *s1_uuid = swim_member_uuid(swim_self(s1));
+	struct swim_member *s1_view = swim_member_by_uuid(s2, s1_uuid);
+	swim_member_ref(s1_view);
+	swim_remove_member(s2, s1_uuid);
+	ok(swim_member_is_dropped(s1_view), "if a referenced "\
+	   "member is dropped, it can be detected from the public API");
+	swim_member_unref(s1_view);
 
 	swim_cluster_delete(cluster);
 
diff --git a/test/unit/swim.result b/test/unit/swim.result
index 6e439541a..24597641c 100644
--- a/test/unit/swim.result
+++ b/test/unit/swim.result
@@ -47,7 +47,7 @@ ok 3 - subtests
 ok 4 - subtests
 	*** swim_test_cfg: done ***
 	*** swim_test_add_remove ***
-    1..13
+    1..14
     ok 1 - can not add an existing member
     ok 2 - diag says 'already exists'
     ok 3 - can not add a invalid uri
@@ -61,6 +61,7 @@ ok 4 - subtests
     ok 11 - after removal the cluster is not in fullmesh
     ok 12 - but it is back in 1 step
     ok 13 - back in fullmesh after a member removal in the middle of a step
+    ok 14 - if a referenced member is dropped, it can be detected from the public API
 ok 5 - subtests
 	*** swim_test_add_remove: done ***
 	*** swim_test_basic_failure_detection ***




More information about the Tarantool-patches mailing list