[PATCH 4/5] swim: introduce "suspected" status

Vladislav Shpilevoy v.shpilevoy at tarantool.org
Mon Dec 17 15:53:22 MSK 2018


Now a member dies "gradually". After some failed pings
it is declared as suspected. After more failed pings
it is finaly dead. New members in a config are
declared as suspected because the instance can not
be sure whether they are alive or not.

Follow up #3234
---
 src/lib/swim/swim.c    | 26 +++++++++++++++++++++++---
 test/swim/basic.result | 26 +++++++++++++-------------
 2 files changed, 36 insertions(+), 16 deletions(-)

diff --git a/src/lib/swim/swim.c b/src/lib/swim/swim.c
index bbf6b7fd5..df57ef470 100644
--- a/src/lib/swim/swim.c
+++ b/src/lib/swim/swim.c
@@ -156,6 +156,12 @@ enum swim_member_status {
 	 * members table.
 	 */
 	MEMBER_ALIVE = 0,
+	/**
+	 * If a member has not responded to a ping, it is declared
+	 * as suspected to be dead. After more failed pings it
+	 * is finaly dead.
+	 */
+	MEMBER_SUSPECTED,
 	/**
 	 * The member is considered to be dead. It will disappear
 	 * from the membership, if it is not pinned.
@@ -166,6 +172,7 @@ enum swim_member_status {
 
 static const char *swim_member_status_strs[] = {
 	"alive",
+	"suspected",
 	"dead",
 };
 
@@ -596,9 +603,15 @@ enum {
 	ACK_TIMEOUT = 1,
 	/**
 	 * If a member has not been responding to pings this
-	 * number of times, it is considered to be dead.
+	 * number of times, it is suspected to be dead. To confirm
+	 * the death it should fail more pings.
 	 */
-	NO_ACKS_TO_DEAD = 3,
+	NO_ACKS_TO_SUSPECT = 2,
+	/**
+	 * If a member is suspected to be dead, after this number
+	 * of failed pings its death is confirmed.
+	 */
+	NO_ACKS_TO_DEAD = NO_ACKS_TO_SUSPECT + 2,
 	/**
 	 * If a not pinned member confirmed to be dead, it is
 	 * removed from the membership after at least this number
@@ -1015,6 +1028,9 @@ swim_check_acks(struct ev_loop *loop, struct ev_periodic *p, int events)
 		if (m->failed_pings >= NO_ACKS_TO_DEAD) {
 			m->status = MEMBER_DEAD;
 			swim_member_is_updated(m);
+		} else if (m->failed_pings >= NO_ACKS_TO_SUSPECT) {
+			m->status = MEMBER_SUSPECTED;
+			swim_member_is_updated(m);
 		}
 		swim_io_task_push(&m->ping_task);
 		rlist_del_entry(m, in_queue_wait_ack);
@@ -1505,7 +1521,11 @@ swim_cfg(const char **member_uris, int member_uri_count, const char *server_uri,
 		free(cfg);
 		for (int i = 0; i < new_cfg_size; ++i) {
 			new_cfg[i]->is_pinned = true;
-			new_cfg[i]->status = MEMBER_ALIVE;
+			/*
+			 * Real status is unknown, so a new member
+			 * can not be alive.
+			 */
+			new_cfg[i]->status = MEMBER_SUSPECTED;
 		}
 		cfg = new_cfg;
 		cfg_size = new_cfg_size;
diff --git a/test/swim/basic.result b/test/swim/basic.result
index 7d0131606..f223950a6 100644
--- a/test/swim/basic.result
+++ b/test/swim/basic.result
@@ -42,13 +42,13 @@ swim.cfg({members = members})
 swim_info_sorted()
 ---
 - - - 192.168.0.1:3333
-    - status: alive
+    - status: suspected
       incarnation: 0
   - - 192.168.0.2:3333
-    - status: alive
+    - status: suspected
       incarnation: 0
   - - 192.168.0.3:3333
-    - status: alive
+    - status: suspected
       incarnation: 0
 ...
 swim.stop()
@@ -68,13 +68,13 @@ swim_info_sorted()
     - status: alive
       incarnation: 0
   - - 192.168.0.1:3333
-    - status: alive
+    - status: suspected
       incarnation: 0
   - - 192.168.0.2:3333
-    - status: alive
+    - status: suspected
       incarnation: 0
   - - 192.168.0.3:3333
-    - status: alive
+    - status: suspected
       incarnation: 0
 ...
 swim.debug_round_step()
@@ -86,13 +86,13 @@ swim_info_sorted()
     - status: alive
       incarnation: 0
   - - 192.168.0.1:3333
-    - status: alive
+    - status: suspected
       incarnation: 0
   - - 192.168.0.2:3333
-    - status: alive
+    - status: suspected
       incarnation: 0
   - - 192.168.0.3:3333
-    - status: alive
+    - status: suspected
       incarnation: 0
 ...
 swim.stop()
@@ -174,16 +174,16 @@ swim.cfg({server = listen_uri, members = members})
 swim_info_sorted()
 ---
 - - - 127.0.0.1:listen_port
-    - status: alive
+    - status: suspected
       incarnation: 0
   - - 192.168.0.1:3333
-    - status: alive
+    - status: suspected
       incarnation: 0
   - - 192.168.0.2:3333
-    - status: alive
+    - status: suspected
       incarnation: 0
   - - 192.168.0.3:3333
-    - status: alive
+    - status: suspected
       incarnation: 0
 ...
 swim.stop()
-- 
2.17.2 (Apple Git-113)




More information about the Tarantool-patches mailing list